From 86ba1e703db3eabc1b632c3c22794c12883315a3 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 20 May 2022 02:49:51 -0400 Subject: [PATCH 01/40] Refactor the ASTBuilder to get rif of the currentAttr attribute. This attribute was used to attach the right docstring node to the right Attribute object. Now it uses AST node navigation (with the .parent attribute) instead for fetching the docstring node for an ast.Assign. This change might not be worth it, on the one hand it removes a attribute beeing mutated at different palces in the code, but replaces this kind of "unsafe" state tracking (meaning not with pop() and push()) by some more verbose solution that involves adding the .parent attribute on all nodes. The zopeinferface extension needed to be adjusted as well because it relied on the docstring assigment feature in an implicit way, now it's explicit what we're doing. --- pydoctor/astbuilder.py | 152 ++++++++++++++++---------- pydoctor/astutils.py | 46 +++++++- pydoctor/epydoc/markup/_pyval_repr.py | 24 +--- pydoctor/extensions/zopeinterface.py | 16 ++- pydoctor/test/test_astutils.py | 15 +++ pydoctor/test/test_zopeinterface.py | 4 +- 6 files changed, 178 insertions(+), 79 deletions(-) create mode 100644 pydoctor/test/test_astutils.py diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index a1e2d4831..2e7d20e18 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -15,7 +15,10 @@ import astor from pydoctor import epydoc2stan, model, node2stan from pydoctor.epydoc.markup._pyval_repr import colorize_inline_pyval -from pydoctor.astutils import bind_args, node2dottedname, node2fullname, is__name__equals__main__, NodeVisitor +from pydoctor.astutils import (bind_args, node2dottedname, node2fullname, is__name__equals__main__, + get_assign_docstring_node, parentage_ast_tree, NodeVisitor) + + def parseFile(path: Path) -> ast.Module: """Parse the contents of a Python source file.""" @@ -24,9 +27,13 @@ def parseFile(path: Path) -> ast.Module: return _parse(src, filename=str(path)) if sys.version_info >= (3,8): - _parse = partial(ast.parse, type_comments=True) + _ast_parse = partial(ast.parse, type_comments=True) else: - _parse = ast.parse + _ast_parse = ast.parse + +def _parse(source: Union[str, bytes], **kwargs:Any) -> ast.Module: + mod = _ast_parse(source, **kwargs) + return parentage_ast_tree(mod) def _maybeAttribute(cls: model.Class, name: str) -> bool: @@ -40,6 +47,8 @@ def _maybeAttribute(cls: model.Class, name: str) -> bool: obj = cls.find(name) return obj is None or isinstance(obj, model.Attribute) +class SkipInlineDocstring(Exception): + ... def _handleAliasing( ctx: model.CanContainImportsDocumentable, @@ -539,7 +548,7 @@ def _handleModuleVar(self, obj = parent.contents.get(target) if obj is None: - obj = self.builder.addAttribute(name=target, kind=None, parent=parent) + obj = self.builder.addAttribute(name=target, kind=None, parent=parent, lineno=lineno) # If it's not an attribute it means that the name is already denifed as function/class # probably meaning that this attribute is a bound callable. @@ -551,15 +560,16 @@ def _handleModuleVar(self, # We don't know how to handle this, # so we ignore it to document the original object. This means that we might document arguments # that are in reality not existing because they have values in a partial() call for instance. + # TODO: Should we report a warning? if not isinstance(obj, model.Attribute): - return + # Skips inline docsrings assigments + raise SkipInlineDocstring() if annotation is None and expr is not None: annotation = _infer_type(expr) obj.annotation = annotation - obj.setLineNumber(lineno) if is_constant(obj): self._handleConstant(obj=obj, value=expr, lineno=lineno) @@ -569,8 +579,6 @@ def _handleModuleVar(self, # check if they have been initialized or not. obj.value = expr - self.builder.currentAttr = obj - def _handleAssignmentInModule(self, target: str, annotation: Optional[ast.expr], @@ -591,13 +599,13 @@ def _handleClassVar(self, cls = self.builder.current assert isinstance(cls, model.Class) if not _maybeAttribute(cls, name): - return + raise SkipInlineDocstring() # Class variables can only be Attribute, so it's OK to cast obj = cast(Optional[model.Attribute], cls.contents.get(name)) if obj is None: - obj = self.builder.addAttribute(name=name, kind=None, parent=cls) + obj = self.builder.addAttribute(name=name, kind=None, parent=cls, lineno=lineno) if obj.kind is None: instance = is_attrib(expr, cls) or ( @@ -615,41 +623,43 @@ def _handleClassVar(self, annotation = _infer_type(expr) obj.annotation = annotation - obj.setLineNumber(lineno) if is_constant(obj): self._handleConstant(obj=obj, value=expr, lineno=lineno) else: obj.value = expr - - self.builder.currentAttr = obj - + + def _getClassFromMethodContext(self) -> Optional[model.Class]: + func = self.builder.current + if not isinstance(func, model.Function): + return None + cls = func.parent + if not isinstance(cls, model.Class): + return None + return cls + def _handleInstanceVar(self, name: str, annotation: Optional[ast.expr], expr: Optional[ast.expr], lineno: int ) -> None: - func = self.builder.current - if not isinstance(func, model.Function): - return - cls = func.parent - if not isinstance(cls, model.Class): + cls = self._getClassFromMethodContext() + if not cls: return if not _maybeAttribute(cls, name): - return + raise SkipInlineDocstring() # Class variables can only be Attribute, so it's OK to cast because we used _maybeAttribute() above. obj = cast(Optional[model.Attribute], cls.contents.get(name)) if obj is None: - obj = self.builder.addAttribute(name=name, kind=None, parent=cls) + obj = self.builder.addAttribute(name=name, kind=None, parent=cls, lineno=lineno) if annotation is None and expr is not None: annotation = _infer_type(expr) obj.annotation = annotation - obj.setLineNumber(lineno) # Maybe an instance variable overrides a constant, # so we check before setting the kind to INSTANCE_VARIABLE. @@ -659,7 +669,6 @@ def _handleInstanceVar(self, obj.kind = model.DocumentableKind.INSTANCE_VARIABLE obj.value = expr - self.builder.currentAttr = obj def _handleAssignmentInClass(self, target: str, @@ -752,25 +761,54 @@ def visit_Assign(self, node: ast.Assign) -> None: annotation = self._unstring_annotation(ast.Str(type_comment, lineno=lineno)) for target in node.targets: - if isinstance(target, ast.Tuple): - for elem in target.elts: - # Note: We skip type and aliasing analysis for this case, - # but we do record line numbers. - self._handleAssignment(elem, None, None, lineno) + try: + if isinstance(target, ast.Tuple): + for elem in target.elts: + # Note: We skip type and aliasing analysis for this case, + # but we do record line numbers. + self._handleAssignment(elem, None, None, lineno) + else: + self._handleAssignment(target, annotation, expr, lineno) + except SkipInlineDocstring: + continue else: - self._handleAssignment(target, annotation, expr, lineno) + self._handleInlineDocstrings(node, target) def visit_AnnAssign(self, node: ast.AnnAssign) -> None: annotation = self._unstring_annotation(node.annotation) - self._handleAssignment(node.target, annotation, node.value, node.lineno) + try: + self._handleAssignment(node.target, annotation, node.value, node.lineno) + except SkipInlineDocstring: + return + else: + self._handleInlineDocstrings(node, node.target) + + def _handleInlineDocstrings(self, assign:Union[ast.Assign, ast.AnnAssign], target:ast.expr) -> None: + # Process the inline docstrings + dottedname = node2dottedname(target) + if not dottedname: + return + + parent = self.builder.current + if dottedname[0] == 'self': + dottedname = dottedname[1:] + parent = self._getClassFromMethodContext() + if not parent: + return + + if len(dottedname) != 1: + return + + docstring_node = get_assign_docstring_node(assign) + if docstring_node: + + # fetch the target of the inline docstring + attr = parent.contents.get(dottedname[0]) + if attr: + attr.setDocstring(docstring_node) def visit_Expr(self, node: ast.Expr) -> None: - value = node.value - if isinstance(value, ast.Str): - attr = self.builder.currentAttr - if attr is not None: - attr.setDocstring(value) - self.builder.currentAttr = None + # Visit's ast.Expr.value with the visitor, used by extensions to visit top-level calls. self.generic_visit(node) @@ -903,8 +941,10 @@ def _handlePropertyDef(self, lineno: int ) -> model.Attribute: - attr = self.builder.addAttribute(name=node.name, kind=model.DocumentableKind.PROPERTY, parent=self.builder.current) - attr.setLineNumber(lineno) + attr = self.builder.addAttribute(name=node.name, + kind=model.DocumentableKind.PROPERTY, + parent=self.builder.current, + lineno=lineno) if docstring is not None: attr.setDocstring(docstring) @@ -1044,7 +1084,7 @@ class _AnnotationStringParser(ast.NodeTransformer): """ def _parse_string(self, value: str) -> ast.expr: - statements = ast.parse(value).body + statements = _parse(value).body if len(statements) != 1: raise SyntaxError("expected expression, found multiple statements") stmt, = statements @@ -1148,28 +1188,31 @@ class ASTBuilder: def __init__(self, system: model.System): self.system = system - self.current = cast(model.Documentable, None) # current visited object - self.currentMod: Optional[model.Module] = None # module, set when visiting ast.Module - self.currentAttr: Optional[model.Documentable] = None # recently visited attribute object + self.current = cast(model.Documentable, None) # current visited object. + self.currentMod: Optional[model.Module] = None # current module, set when visiting ast.Module. self._stack: List[model.Documentable] = [] self.ast_cache: Dict[Path, Optional[ast.Module]] = {} - - def _push(self, cls: Type[DocumentableT], name: str, lineno: int) -> DocumentableT: + def _push(self, + cls: Type[DocumentableT], + name: str, + lineno: int, + parent:Optional[model.Documentable]=None) -> DocumentableT: """ Create and enter a new object of the given type and add it to the system. + + @param parent: Parent of the new documentable instance, it will use self.current if unspecified. + Used for attributes declared in methods, typically ``__init__``. """ - obj = cls(self.system, name, self.current) + obj = cls(self.system, name, parent or self.current) self.system.addObject(obj) self.push(obj, lineno) - self.currentAttr = None return obj def _pop(self, cls: Type[model.Documentable]) -> None: assert isinstance(self.current, cls) self.pop(self.current) - self.currentAttr = None def push(self, obj: model.Documentable, lineno: int) -> None: """ @@ -1224,18 +1267,17 @@ def popFunction(self) -> None: self._pop(self.system.Function) def addAttribute(self, - name: str, kind: Optional[model.DocumentableKind], parent: model.Documentable + name: str, + kind: Optional[model.DocumentableKind], + parent: model.Documentable, + lineno: int ) -> model.Attribute: """ - Add a new attribute to the system, attributes cannot be "entered". + Add a new attribute to the system. """ - system = self.system - parentMod = self.currentMod - attr = system.Attribute(system, name, parent) + attr = self._push(self.system.Attribute, name, lineno, parent=parent) + self._pop(self.system.Attribute) attr.kind = kind - attr.parentMod = parentMod - system.addObject(attr) - self.currentAttr = attr return attr def warning(self, message: str, detail: str) -> None: diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index e5aa88446..5a618fead 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -4,7 +4,7 @@ import sys from numbers import Number -from typing import Iterator, Optional, List, Iterable, Sequence, TYPE_CHECKING +from typing import Iterator, Optional, List, Iterable, Sequence, TYPE_CHECKING, TypeVar, Union from inspect import BoundArguments, Signature import ast @@ -155,3 +155,47 @@ def is_using_annotations(expr: Optional[ast.AST], if full_name in annotations: return True return False + +def get_assign_docstring_node(assign:Union[ast.Assign, ast.AnnAssign]) -> Optional[ast.Str]: + """ + Get the docstring for a L{ast.Assign} or L{ast.AnnAssign} node. + + This helper function relies on the non-standard C{.parent} attribute on AST nodes + to navigate upward in the tree and determine this node direct siblings. + """ + parent_node = getattr(assign, 'parent', None) + + if not parent_node: + assert False, "The 'parent' attribute is not correctly set up on ast nodes." + + body = getattr(parent_node, 'body', None) + + if body: + assert isinstance(body, list) + assign_index = body.index(assign) + try: + right_sibling = body[assign_index+1] + except IndexError: + return None + if isinstance(right_sibling, ast.Expr) and \ + get_str_value(right_sibling.value) is not None: + return right_sibling.value + return None + +_AST = TypeVar('_AST', bound=ast.AST) +def parentage_ast_tree(node:_AST) ->_AST: + """ + Add C{parent} attribute to all ast nodes starting at C{node}. + """ + class _Parentage(ast.NodeTransformer): + # stolen from https://stackoverflow.com/a/68845448 + parent: Optional[ast.AST] = None + + def visit(self, node: _AST) -> _AST: + setattr(node, 'parent', self.parent) + self.parent = node + node = super().visit(node) + if isinstance(node, ast.AST): + self.parent = getattr(node, 'parent') + return node + return _Parentage().visit(node) diff --git a/pydoctor/epydoc/markup/_pyval_repr.py b/pydoctor/epydoc/markup/_pyval_repr.py index daf821bc5..4d5fd4c80 100644 --- a/pydoctor/epydoc/markup/_pyval_repr.py +++ b/pydoctor/epydoc/markup/_pyval_repr.py @@ -52,7 +52,7 @@ from pydoctor.epydoc.markup import DocstringLinker from pydoctor.epydoc.markup.restructuredtext import ParsedRstDocstring from pydoctor.epydoc.docutils import set_node_attributes, wbr, obj_reference -from pydoctor.astutils import node2dottedname, bind_args +from pydoctor.astutils import node2dottedname, bind_args, parentage_ast_tree from pydoctor.node2stan import gettext def decode_with_backslashreplace(s: bytes) -> str: @@ -113,21 +113,6 @@ def restore(self, mark: _MarkedColorizerState) -> List[nodes.Node]: del self.result[mark.length:] return trimmed -class _Parentage(ast.NodeTransformer): - """ - Add C{parent} attribute to ast nodes instances. - """ - # stolen from https://stackoverflow.com/a/68845448 - parent: Optional[ast.AST] = None - - def visit(self, node: ast.AST) -> ast.AST: - setattr(node, 'parent', self.parent) - self.parent = node - node = super().visit(node) - if isinstance(node, ast.AST): - self.parent = getattr(node, 'parent') - return node - # TODO: add support for comparators when needed. class _OperatorDelimiter: """ @@ -526,9 +511,10 @@ def _colorize_ast_constant(self, pyval: ast.AST, state: _ColorizerState) -> None self._output('...', self.ELLIPSIS_TAG, state) def _colorize_ast(self, pyval: ast.AST, state: _ColorizerState) -> None: - # Set nodes parent in order to check theirs precedences and add delimiters when needed. - if not getattr(pyval, 'parent', None): - _Parentage().visit(pyval) + # Check nodes parent is set. + # This is required to check precedences and add delimiters when needed. + if not hasattr(pyval, 'parent'): + pyval = parentage_ast_tree(pyval) if self._is_ast_constant(pyval): self._colorize_ast_constant(pyval, state) diff --git a/pydoctor/extensions/zopeinterface.py b/pydoctor/extensions/zopeinterface.py index 5db6c8b3f..853258797 100644 --- a/pydoctor/extensions/zopeinterface.py +++ b/pydoctor/extensions/zopeinterface.py @@ -183,16 +183,26 @@ def _handleZopeInterfaceAssignmentInModule(self, ob = self.visitor.system.objForFullName(funcName) if isinstance(ob, ZopeInterfaceClass) and ob.isinterfaceclass: # TODO: Process 'bases' and '__doc__' arguments. - # TODO: Currently, this implementation will create a duplicate class - # with the same name as the attribute, overriding it. + + # Fetch older attr documentable + old_attr = self.visitor.builder.current.contents.get(target) + if old_attr: + self.visitor.builder.system._remove(old_attr) # avoid duplicate warning by simply removing the old item + interface = self.visitor.builder.pushClass(target, lineno) assert isinstance(interface, ZopeInterfaceClass) + + # the docstring node has already been attached to the documentable + # by the time the zopeinterface extension is run, so we fetch the right docstring info from old documentable. + if old_attr: + interface.docstring = old_attr.docstring + interface.docstring_lineno = old_attr.docstring_lineno + interface.isinterface = True interface.implementedby_directly = [] interface.bases = [] interface.baseobjects = [] self.visitor.builder.popClass() - self.visitor.builder.currentAttr = interface def _handleZopeInterfaceAssignmentInClass(self, target: str, diff --git a/pydoctor/test/test_astutils.py b/pydoctor/test/test_astutils.py new file mode 100644 index 000000000..98b65a93e --- /dev/null +++ b/pydoctor/test/test_astutils.py @@ -0,0 +1,15 @@ + +from pydoctor import astutils, astbuilder + +def test_parentage() -> None: + tree = astbuilder._parse('class f(b):...') + assert tree.body[0].parent == tree + assert tree.body[0].body[0].parent == tree.body[0] + assert tree.body[0].bases[0].parent == tree.body[0] + +def test_get_assign_docstring_node() -> None: + tree = astbuilder._parse('var = 1\n\n\n"inline docs"') + assert astutils.get_str_value(astutils.get_assign_docstring_node(tree.body[0])) == "inline docs" + + tree = astbuilder._parse('var:int = 1\n\n\n"inline docs"') + assert astutils.get_str_value(astutils.get_assign_docstring_node(tree.body[0])) == "inline docs" diff --git a/pydoctor/test/test_zopeinterface.py b/pydoctor/test/test_zopeinterface.py index 681385510..e0c65602e 100644 --- a/pydoctor/test/test_zopeinterface.py +++ b/pydoctor/test/test_zopeinterface.py @@ -158,7 +158,7 @@ class C(zi.Interface): assert captured == 'mod:5: definition of attribute "bad_attr" should have docstring as its sole argument\n' @zope_interface_systemcls_param -def test_interfaceclass(systemcls: Type[model.System]) -> None: +def test_interfaceclass(systemcls: Type[model.System], capsys: CapSys) -> None: system = processPackage('interfaceclass', systemcls=systemcls) mod = system.allobjects['interfaceclass.mod'] I = mod.contents['MyInterface'] @@ -170,6 +170,8 @@ def test_interfaceclass(systemcls: Type[model.System]) -> None: assert isinstance(J, ZopeInterfaceClass) assert J.isinterface + assert 'interfaceclass.mod duplicate' not in capsys.readouterr().out + @zope_interface_systemcls_param def test_warnerproofing(systemcls: Type[model.System]) -> None: src = ''' From 31b47b13e7cb5bb0b3f4c7242849aea805bd01ca Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 20 May 2022 17:06:23 -0400 Subject: [PATCH 02/40] Continue a bit of refactor --- pydoctor/astbuilder.py | 35 ++++++++++++++++++++++++++-------- pydoctor/astutils.py | 10 +++++----- pydoctor/model.py | 2 +- pydoctor/test/test_astutils.py | 10 +++++----- 4 files changed, 38 insertions(+), 19 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 2e7d20e18..5dc104fb3 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -49,6 +49,10 @@ def _maybeAttribute(cls: model.Class, name: str) -> bool: class SkipInlineDocstring(Exception): ... +class InvalidName(Exception): + ... +class UsingSelfInUnknownContext(Exception): + ... def _handleAliasing( ctx: model.CanContainImportsDocumentable, @@ -596,6 +600,7 @@ def _handleClassVar(self, expr: Optional[ast.expr], lineno: int ) -> None: + cls = self.builder.current assert isinstance(cls, model.Class) if not _maybeAttribute(cls, name): @@ -669,7 +674,6 @@ def _handleInstanceVar(self, obj.kind = model.DocumentableKind.INSTANCE_VARIABLE obj.value = expr - def _handleAssignmentInClass(self, target: str, annotation: Optional[ast.expr], @@ -783,18 +787,33 @@ def visit_AnnAssign(self, node: ast.AnnAssign) -> None: else: self._handleInlineDocstrings(node, node.target) - def _handleInlineDocstrings(self, assign:Union[ast.Assign, ast.AnnAssign], target:ast.expr) -> None: - # Process the inline docstrings + def _reduceNameInContext(self, target:ast.expr) -> Tuple[List[str], model.Documentable]: + """ + If the current context is a method, strip the C{'self.'} part of assignment names and return + the right L{Class} context in which to use the new name. The new name maybe dotted. + + @returns: Tuple C{(dottedname, context)} or just the parsed target with the current + context if the target is not using C{self.}. + @raises: L{InvalidName} or L{UsingSelfInUnknownContext}. + """ dottedname = node2dottedname(target) if not dottedname: - return - + raise InvalidName() parent = self.builder.current if dottedname[0] == 'self': dottedname = dottedname[1:] - parent = self._getClassFromMethodContext() - if not parent: - return + maybe_parent = self._getClassFromMethodContext() + if not maybe_parent: + raise UsingSelfInUnknownContext() + parent = maybe_parent + return dottedname, parent + + def _handleInlineDocstrings(self, assign:Union[ast.Assign, ast.AnnAssign], target:ast.expr) -> None: + # Process the inline docstrings + try: + dottedname, parent = self._reduceNameInContext(target) + except (InvalidName, UsingSelfInUnknownContext): + return if len(dottedname) != 1: return diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index 5a618fead..0ef228743 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -4,7 +4,7 @@ import sys from numbers import Number -from typing import Iterator, Optional, List, Iterable, Sequence, TYPE_CHECKING, TypeVar, Union +from typing import Iterator, Optional, List, Iterable, Sequence, TYPE_CHECKING, TypeVar, Union, cast from inspect import BoundArguments, Signature import ast @@ -156,7 +156,7 @@ def is_using_annotations(expr: Optional[ast.AST], return True return False -def get_assign_docstring_node(assign:Union[ast.Assign, ast.AnnAssign]) -> Optional[ast.Str]: +def get_assign_docstring_node(assign:Union[ast.Assign, ast.AnnAssign]) -> Optional[Union[ast.Str, ast.Constant]]: """ Get the docstring for a L{ast.Assign} or L{ast.AnnAssign} node. @@ -179,7 +179,7 @@ def get_assign_docstring_node(assign:Union[ast.Assign, ast.AnnAssign]) -> Option return None if isinstance(right_sibling, ast.Expr) and \ get_str_value(right_sibling.value) is not None: - return right_sibling.value + return cast('Union[ast.Str, ast.Constant]', right_sibling.value) return None _AST = TypeVar('_AST', bound=ast.AST) @@ -191,11 +191,11 @@ class _Parentage(ast.NodeTransformer): # stolen from https://stackoverflow.com/a/68845448 parent: Optional[ast.AST] = None - def visit(self, node: _AST) -> _AST: + def visit(self, node: ast.AST) -> ast.AST: setattr(node, 'parent', self.parent) self.parent = node node = super().visit(node) if isinstance(node, ast.AST): self.parent = getattr(node, 'parent') return node - return _Parentage().visit(node) + return cast('_AST', _Parentage().visit(node)) diff --git a/pydoctor/model.py b/pydoctor/model.py index f159d01e1..79538c663 100644 --- a/pydoctor/model.py +++ b/pydoctor/model.py @@ -154,7 +154,7 @@ def setup(self) -> None: self.contents: Dict[str, Documentable] = {} self._linker: Optional['linker.DocstringLinker'] = None - def setDocstring(self, node: ast.Str) -> None: + def setDocstring(self, node: Union[ast.Str, ast.Constant]) -> None: doc = node.s lineno = node.lineno if _string_lineno_is_end: diff --git a/pydoctor/test/test_astutils.py b/pydoctor/test/test_astutils.py index 98b65a93e..a9a8a7441 100644 --- a/pydoctor/test/test_astutils.py +++ b/pydoctor/test/test_astutils.py @@ -3,13 +3,13 @@ def test_parentage() -> None: tree = astbuilder._parse('class f(b):...') - assert tree.body[0].parent == tree - assert tree.body[0].body[0].parent == tree.body[0] - assert tree.body[0].bases[0].parent == tree.body[0] + assert tree.body[0].parent == tree # type:ignore + assert tree.body[0].body[0].parent == tree.body[0] # type:ignore + assert tree.body[0].bases[0].parent == tree.body[0] # type:ignore def test_get_assign_docstring_node() -> None: tree = astbuilder._parse('var = 1\n\n\n"inline docs"') - assert astutils.get_str_value(astutils.get_assign_docstring_node(tree.body[0])) == "inline docs" + assert astutils.get_str_value(astutils.get_assign_docstring_node(tree.body[0])) == "inline docs" # type:ignore tree = astbuilder._parse('var:int = 1\n\n\n"inline docs"') - assert astutils.get_str_value(astutils.get_assign_docstring_node(tree.body[0])) == "inline docs" + assert astutils.get_str_value(astutils.get_assign_docstring_node(tree.body[0])) == "inline docs" # type:ignore From f34040d37c6a31ab952164cc5ab805e43e1e324f Mon Sep 17 00:00:00 2001 From: tristanlatr <19967168+tristanlatr@users.noreply.github.com> Date: Wed, 25 May 2022 22:53:11 -0400 Subject: [PATCH 03/40] Update pydoctor/astbuilder.py --- pydoctor/astbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 5dc104fb3..ca013f80e 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -790,7 +790,7 @@ def visit_AnnAssign(self, node: ast.AnnAssign) -> None: def _reduceNameInContext(self, target:ast.expr) -> Tuple[List[str], model.Documentable]: """ If the current context is a method, strip the C{'self.'} part of assignment names and return - the right L{Class} context in which to use the new name. The new name maybe dotted. + the right L{Class} context in which to use the new name. The new name maybe dotted. @returns: Tuple C{(dottedname, context)} or just the parsed target with the current context if the target is not using C{self.}. From 0d60f572bb19bf41440342af78492972bd82b219 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 20 Jun 2024 14:14:28 -0400 Subject: [PATCH 04/40] Trigger a warning when several docstrings are detected --- README.rst | 2 + pydoctor/astbuilder.py | 6 +-- pydoctor/model.py | 11 +++++ pydoctor/test/test_astbuilder.py | 76 +++++++++++++++++++++++++------- 4 files changed, 76 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index 2fc442f96..27745e6f1 100644 --- a/README.rst +++ b/README.rst @@ -76,6 +76,8 @@ What's New? in development ^^^^^^^^^^^^^^ +* Trigger a warning when several docstrings are detected for the same object. + pydoctor 24.3.3 ^^^^^^^^^^^^^^^ diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index f180895a6..1df74cd9e 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -717,7 +717,7 @@ def warn(msg: str) -> None: return if obj is not None: - obj.docstring = docstring + obj._setDocstringValue(docstring, expr.lineno) # TODO: It might be better to not perform docstring parsing until # we have the final docstrings for all objects. obj.parsed_docstring = None @@ -959,9 +959,7 @@ def _handlePropertyDef(self, if tag == 'return': if not pdoc.has_body: pdoc = field.body() - # Avoid format_summary() going back to the original - # empty-body docstring. - attr.docstring = '' + elif tag == 'rtype': attr.parsed_type = field.body() else: diff --git a/pydoctor/model.py b/pydoctor/model.py index 04466bdfe..6ffaf23cc 100644 --- a/pydoctor/model.py +++ b/pydoctor/model.py @@ -166,8 +166,19 @@ def setup(self) -> None: def setDocstring(self, node: astutils.Str) -> None: lineno, doc = astutils.extract_docstring(node) + self._setDocstringValue(doc, lineno) + + def _setDocstringValue(self, doc:str, lineno:int) -> None: + if self.docstring or self.parsed_docstring: # some object have a parsed docstring only like the ones coming from ivar fields + msg = 'Existing docstring' + if self.docstring_lineno: + msg += f' at line {self.docstring_lineno}' + msg += f' is overriden' + self.report(msg, 'docstring', lineno_offset=lineno-self.docstring_lineno) self.docstring = doc self.docstring_lineno = lineno + if self.parsed_docstring: + self.parsed_docstring = None def setLineNumber(self, lineno: LineFromDocstringField | LineFromAst | int) -> None: """ diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 6578138d6..6d1240262 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -1114,11 +1114,11 @@ def method1(): def method2(): pass - method1.__doc__ = "Updated docstring #1" + method1.__doc__ = "Override docstring #1" fun.__doc__ = "Happy Happy Joy Joy" CLS.__doc__ = "Clears the screen" - CLS.method2.__doc__ = "Updated docstring #2" + CLS.method2.__doc__ = "Set docstring #2" None.__doc__ = "Free lunch!" real.__doc__ = "Second breakfast" @@ -1142,19 +1142,14 @@ def mark_unavailable(func): assert method2.kind is model.DocumentableKind.METHOD assert method2.docstring == "Updated docstring #2" captured = capsys.readouterr() - lines = captured.out.split('\n') - assert len(lines) > 0 and lines[0] == \ - ":20: Unable to figure out target for __doc__ assignment" - assert len(lines) > 1 and lines[1] == \ - ":21: Unable to figure out target for __doc__ assignment: " \ - "computed full name not found: real" - assert len(lines) > 2 and lines[2] == \ - ":22: Unable to figure out value for __doc__ assignment, " \ - "maybe too complex" - assert len(lines) > 3 and lines[3] == \ - ":23: Ignoring value assigned to __doc__: not a string" - assert len(lines) == 5 and lines[-1] == '' - + assert captured.out == ( + ':14: Existing docstring at line 8 is overriden\n' + ':20: Unable to figure out target for __doc__ assignment\n' + ':21: Unable to figure out target for __doc__ assignment: computed full name not found: real\n' + ':22: Unable to figure out value for __doc__ assignment, maybe too complex\n' + ':23: Ignoring value assigned to __doc__: not a string\n' + ) + @systemcls_param def test_docstring_assignment_detuple(systemcls: Type[model.System], capsys: CapSys) -> None: """We currently don't trace values for detupling assignments, so when @@ -2747,3 +2742,54 @@ def test_typealias_unstring(systemcls: Type[model.System]) -> None: # there is not Constant nodes in the type alias anymore next(n for n in ast.walk(typealias.value) if isinstance(n, ast.Constant)) +@systemcls_param +def test_mutilple_docstrings_warnings(systemcls: Type[model.System], capsys: CapSys) -> None: + """ + When pydoctor encounters multiple places where the docstring is defined, it reports a warning. + """ + src = ''' + class C: + a: int;"docs" + def _(self): + self.a = 0; "re-docs" + + class B: + """ + @ivar a: docs + """ + a: int + "re-docs" + + class A: + """docs""" + A.__doc__ = 're-docs' + ''' + fromText(src, systemcls=systemcls) + assert capsys.readouterr().out == (':5: Existing docstring at line 3 is overriden\n' + ':12: Existing docstring at line 9 is overriden\n' + ':16: Existing docstring at line 15 is overriden\n') + +@systemcls_param +def test_mutilple_docstring_with_doc_comments_warnings(systemcls: Type[model.System], capsys: CapSys) -> None: + src = ''' + class C: + a: int;"docs" #: re-docs + + class B: + """ + @ivar a: docs + """ + #: re-docs + a: int + + class B2: + """ + @ivar a: docs + """ + #: re-docs + a: int + "re-re-docs" + ''' + fromText(src, systemcls=systemcls) + # TODO: handle doc comments.x + assert capsys.readouterr().out == ':18: Existing docstring at line 14 is overriden\n' \ No newline at end of file From 468850f8c87bfbad3446d66757b6e5f525cf307a Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 20 Jun 2024 14:32:56 -0400 Subject: [PATCH 05/40] Fix pyflakes --- pydoctor/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/model.py b/pydoctor/model.py index 6ffaf23cc..9ef8647d8 100644 --- a/pydoctor/model.py +++ b/pydoctor/model.py @@ -173,7 +173,7 @@ def _setDocstringValue(self, doc:str, lineno:int) -> None: msg = 'Existing docstring' if self.docstring_lineno: msg += f' at line {self.docstring_lineno}' - msg += f' is overriden' + msg += ' is overriden' self.report(msg, 'docstring', lineno_offset=lineno-self.docstring_lineno) self.docstring = doc self.docstring_lineno = lineno From 0330265ccc353b9fb27fa546b5181c7828b1e000 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 20 Jun 2024 14:36:26 -0400 Subject: [PATCH 06/40] Forgot the test file --- pydoctor/test/test_astbuilder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 6d1240262..053342be4 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -1137,10 +1137,10 @@ def mark_unavailable(func): assert CLS.docstring == """Clears the screen""" method1 = CLS.contents['method1'] assert method1.kind is model.DocumentableKind.METHOD - assert method1.docstring == "Updated docstring #1" + assert method1.docstring == "Override docstring #1" method2 = CLS.contents['method2'] assert method2.kind is model.DocumentableKind.METHOD - assert method2.docstring == "Updated docstring #2" + assert method2.docstring == "Set docstring #2" captured = capsys.readouterr() assert captured.out == ( ':14: Existing docstring at line 8 is overriden\n' From a7d3aa398a5a81b4c45f0c8774eed7e9c5cdf293 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 20 Jun 2024 15:04:59 -0400 Subject: [PATCH 07/40] Add some soce from sphinx to support doc comments. --- pydoctor/astutils.py | 131 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index 850414c05..3c0d84caa 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -770,3 +770,134 @@ class Precedence(object): del _op_data, _index, _precedence_data, _symbol_data, _deprecated # This was part of the astor library for Python AST manipulation. + + +# Part of the sphinx.pycode.parser module. +# Copyright 2007-2020 by the Sphinx team, see AUTHORS. +# BSD, see LICENSE for details. +from token import DEDENT, INDENT, NAME, NEWLINE, NUMBER, OP, STRING +from tokenize import COMMENT, generate_tokens, tok_name + +class Token: + """Better token wrapper for tokenize module.""" + + def __init__(self, kind: int, value: Any, start: Tuple[int, int], end: Tuple[int, int], + source: str) -> None: + self.kind = kind + self.value = value + self.start = start + self.end = end + self.source = source + + def __eq__(self, other: Any) -> bool: + if isinstance(other, int): + return self.kind == other + elif isinstance(other, str): + return bool(self.value == other) + elif isinstance(other, (list, tuple)): + return [self.kind, self.value] == list(other) + elif other is None: + return False + else: + raise ValueError('Unknown value: %r' % other) + + def match(self, *conditions: Any) -> bool: + return any(self == candidate for candidate in conditions) + + def __repr__(self) -> str: + return '' % (tok_name[self.kind], + self.value.strip()) + +class TokenProcessor: + def __init__(self, buffers: List[str]) -> None: + lines = iter(buffers) + self.buffers = buffers + self.tokens = generate_tokens(lambda: next(lines)) + self.current: Token | None = None + self.previous: Token | None = None + + def get_line(self, lineno: int) -> str: + """Returns specified line.""" + return self.buffers[lineno - 1] + + def fetch_token(self) -> Token | None: + """Fetch a next token from source code. + + Returns ``None`` if sequence finished. + """ + try: + self.previous = self.current + self.current = Token(*next(self.tokens)) + except StopIteration: + self.current = None + + return self.current + + def fetch_until(self, condition: Any) -> List[Token]: + """Fetch tokens until specified token appeared. + + .. note:: This also handles parenthesis well. + """ + tokens = [] + while current := self.fetch_token(): + tokens.append(current) + if current == condition: + break + elif current == [OP, '(']: + tokens += self.fetch_until([OP, ')']) + elif current == [OP, '{']: + tokens += self.fetch_until([OP, '}']) + elif current == [OP, '[']: + tokens += self.fetch_until([OP, ']']) + + return tokens + + +class AfterCommentParser(TokenProcessor): + """Python source code parser to pick up comment after assignment. + + This parser takes a python code starts with assignment statement, + and returns the comments for variable if exists. + """ + + def __init__(self, lines: List[str]) -> None: + super().__init__(lines) + self.comment: str | None = None + + def fetch_rvalue(self) -> List[Token]: + """Fetch right-hand value of assignment.""" + tokens = [] + while current := self.fetch_token(): + tokens.append(current) + if current == [OP, '(']: + tokens += self.fetch_until([OP, ')']) + elif current == [OP, '{']: + tokens += self.fetch_until([OP, '}']) + elif current == [OP, '[']: + tokens += self.fetch_until([OP, ']']) + elif current == INDENT: + tokens += self.fetch_until(DEDENT) + elif current == [OP, ';']: + break + elif current.kind not in (OP, NAME, NUMBER, STRING): + break + + return tokens + + def parse(self) -> None: + """Parse the code and obtain comment after assignment.""" + # skip lvalue (or whole of AnnAssign) + while (current:=self.fetch_token()) and \ + not current.match([OP, '='], NEWLINE, COMMENT): + assert current + + if current is None: + return + + # skip rvalue (if exists) + if current == [OP, '=']: + self.fetch_rvalue() + + if current == COMMENT: + self.comment = current.value +# This was part of the sphinx.pycode.parser module. From 537fb051c60de2b83b9d35e9100580b178af2f85 Mon Sep 17 00:00:00 2001 From: tristanlatr <19967168+tristanlatr@users.noreply.github.com> Date: Tue, 25 Jun 2024 17:01:51 -0400 Subject: [PATCH 08/40] Add comment --- pydoctor/model.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pydoctor/model.py b/pydoctor/model.py index 9ef8647d8..425727ebb 100644 --- a/pydoctor/model.py +++ b/pydoctor/model.py @@ -177,6 +177,8 @@ def _setDocstringValue(self, doc:str, lineno:int) -> None: self.report(msg, 'docstring', lineno_offset=lineno-self.docstring_lineno) self.docstring = doc self.docstring_lineno = lineno + # Due to the current process for parsing doc strings, some objects might already have a parsed_docstring populated at this moment. + # This is an unfortunate behaviour but it’s too big of a refactor for now (see https://github.com/twisted/pydoctor/issues/798). if self.parsed_docstring: self.parsed_docstring = None From f93e09acd8f5f04d7808b6c275287638088f7468 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Wed, 26 Jun 2024 18:20:19 -0400 Subject: [PATCH 09/40] Hook the sphinx code into the astbuiler. --- pydoctor/astbuilder.py | 51 ++++++++++++++---- pydoctor/astutils.py | 90 ++++++++++++++++++++++---------- pydoctor/test/test_astbuilder.py | 50 ++++++++++++++++++ 3 files changed, 152 insertions(+), 39 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index f180895a6..deccf9420 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -15,16 +15,20 @@ from pydoctor import epydoc2stan, model, node2stan, extensions, linker from pydoctor.epydoc.markup._pyval_repr import colorize_inline_pyval -from pydoctor.astutils import (is_none_literal, is_typing_annotation, is_using_annotations, is_using_typing_final, node2dottedname, node2fullname, +from pydoctor.astutils import (is_none_literal, is_typing_annotation, is_using_annotations, is_using_typing_final, node2dottedname, node2fullname, extract_doc_comment, is__name__equals__main__, unstring_annotation, upgrade_annotation, iterassign, extract_docstring_linenum, infer_type, get_parents, get_docstring_node, unparse, NodeVisitor, Parentage, Str) -def parseFile(path: Path) -> ast.Module: - """Parse the contents of a Python source file.""" +def parseFile(path: Path) -> tuple[ast.Module, Sequence[str]]: + """ + Parse the contents of a Python source file. + + @returns: Tuple: ast module, sequence of source code lines. + """ with open(path, 'rb') as f: src = f.read() + b'\n' - return _parse(src, filename=str(path)) + return _parse(src, filename=str(path)), src.splitlines(keepends=True) if sys.version_info >= (3,8): _parse = partial(ast.parse, type_comments=True) @@ -743,6 +747,24 @@ def _handleAssignment(self, self._handleDocstringUpdate(value, expr, lineno) elif isinstance(value, ast.Name) and value.id == 'self': self._handleInstanceVar(targetNode.attr, annotation, expr, lineno) + + def _handleDocComment(self, node: ast.Assign | ast.AnnAssign): + # it does not work with tuple unpacking statements or multiple names at the moment + is_assign = isinstance(node, ast.Assign) + if any(isinstance(t, ast.Tuple) for t in + (node.targets if is_assign else [node.target])) or ( + is_assign and len(node.targets) > 1): + return # should we trigger a warning if a valid doc_comment is found? + + doc_comment = extract_doc_comment(node, self.builder.lines_collection[self.module]) + if doc_comment: + linenumber, docstring = doc_comment + attr = self.builder.currentAttr + if attr is not None: + attr.docstring = docstring + attr.docstring_lineno = linenumber + # will be: attr._setDocstringValue(docstring, linenumber) + def visit_Assign(self, node: ast.Assign) -> None: lineno = node.lineno @@ -763,11 +785,13 @@ def visit_Assign(self, node: ast.Assign) -> None: self._handleAssignment(elem, None, None, lineno) else: self._handleAssignment(target, annotation, expr, lineno) + self._handleDocComment(node) def visit_AnnAssign(self, node: ast.AnnAssign) -> None: annotation = upgrade_annotation(unstring_annotation( node.annotation, self.builder.current), self.builder.current) self._handleAssignment(node.target, annotation, node.value, node.lineno) + self._handleDocComment(node) def visit_AugAssign(self, node:ast.AugAssign) -> None: self._handleAssignment(node.target, None, node.value, @@ -1068,12 +1092,13 @@ class ASTBuilder: def __init__(self, system: model.System): self.system = system - self.current = cast(model.Documentable, None) # current visited object - self.currentMod: Optional[model.Module] = None # module, set when visiting ast.Module - self.currentAttr: Optional[model.Documentable] = None # recently visited attribute object + self.current = cast(model.Documentable, None) #: current visited object + self.currentMod: Optional[model.Module] = None #: module, set when visiting ast.Module + self.currentAttr: Optional[model.Documentable] = None #: recently visited attribute object self._stack: List[model.Documentable] = [] - self.ast_cache: Dict[Path, Optional[ast.Module]] = {} + self.ast_cache: Dict[Path, Optional[ast.Module]] = {} #: avoids calling parse() twice for the same path + self.lines_collection: dict[model.Module, Sequence[str]] = {} #: mapping from modules to source code lines def _push(self, cls: Type[DocumentableT], name: str, lineno: int) -> DocumentableT: @@ -1179,20 +1204,24 @@ def parseFile(self, path: Path, ctx: model.Module) -> Optional[ast.Module]: return self.ast_cache[path] except KeyError: mod: Optional[ast.Module] = None + lines: Sequence[str] | None = None try: - mod = parseFile(path) + mod, lines = parseFile(path) except (SyntaxError, ValueError) as e: ctx.report(f"cannot parse file, {e}") self.ast_cache[path] = mod + self.lines_collection[ctx] = lines return mod def parseString(self, py_string:str, ctx: model.Module) -> Optional[ast.Module]: - mod = None + mod: Optional[ast.Module] = None + lines: Sequence[str] | None = None try: - mod = _parse(py_string) + mod, lines = _parse(py_string), py_string.splitlines(keepends=True) except (SyntaxError, ValueError): ctx.report("cannot parse string") + self.lines_collection[ctx] = lines return mod model.System.defaultBuilder = ASTBuilder diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index 3c0d84caa..f268e78b4 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -5,6 +5,7 @@ import inspect import platform +import re import sys from numbers import Number from typing import Any, Callable, Collection, Iterator, Optional, List, Iterable, Sequence, TYPE_CHECKING, Tuple, Union, cast @@ -813,14 +814,14 @@ def __init__(self, buffers: List[str]) -> None: lines = iter(buffers) self.buffers = buffers self.tokens = generate_tokens(lambda: next(lines)) - self.current: Token | None = None - self.previous: Token | None = None + self.current = None # type: Token + self.previous = None # type: Token def get_line(self, lineno: int) -> str: """Returns specified line.""" return self.buffers[lineno - 1] - def fetch_token(self) -> Token | None: + def fetch_token(self) -> Token: """Fetch a next token from source code. Returns ``None`` if sequence finished. @@ -839,15 +840,15 @@ def fetch_until(self, condition: Any) -> List[Token]: .. note:: This also handles parenthesis well. """ tokens = [] - while current := self.fetch_token(): - tokens.append(current) - if current == condition: + while self.fetch_token(): + tokens.append(self.current) + if self.current == condition: break - elif current == [OP, '(']: + elif self.current == [OP, '(']: tokens += self.fetch_until([OP, ')']) - elif current == [OP, '{']: + elif self.current == [OP, '{']: tokens += self.fetch_until([OP, '}']) - elif current == [OP, '[']: + elif self.current == [OP, '[']: tokens += self.fetch_until([OP, ']']) return tokens @@ -862,24 +863,24 @@ class AfterCommentParser(TokenProcessor): def __init__(self, lines: List[str]) -> None: super().__init__(lines) - self.comment: str | None = None + self.comment = None # type: str def fetch_rvalue(self) -> List[Token]: """Fetch right-hand value of assignment.""" tokens = [] - while current := self.fetch_token(): - tokens.append(current) - if current == [OP, '(']: + while self.fetch_token(): + tokens.append(self.current) + if self.current == [OP, '(']: tokens += self.fetch_until([OP, ')']) - elif current == [OP, '{']: + elif self.current == [OP, '{']: tokens += self.fetch_until([OP, '}']) - elif current == [OP, '[']: + elif self.current == [OP, '[']: tokens += self.fetch_until([OP, ']']) - elif current == INDENT: + elif self.current == INDENT: tokens += self.fetch_until(DEDENT) - elif current == [OP, ';']: + elif self.current == [OP, ';']: break - elif current.kind not in (OP, NAME, NUMBER, STRING): + elif self.current.kind not in (OP, NAME, NUMBER, STRING): break return tokens @@ -887,17 +888,50 @@ def fetch_rvalue(self) -> List[Token]: def parse(self) -> None: """Parse the code and obtain comment after assignment.""" # skip lvalue (or whole of AnnAssign) - while (current:=self.fetch_token()) and \ - not current.match([OP, '='], NEWLINE, COMMENT): - assert current - - if current is None: - return + while not self.fetch_token().match([OP, '='], NEWLINE, COMMENT): + assert self.current # skip rvalue (if exists) - if current == [OP, '=']: + if self.current == [OP, '=']: self.fetch_rvalue() - if current == COMMENT: - self.comment = current.value -# This was part of the sphinx.pycode.parser module. + if self.current == COMMENT: + self.comment = self.current.value + +comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$') +indent_re = re.compile('^\\s*$') + +def extract_doc_comment(node: ast.Assign | ast.AnnAssign, lines: Sequence[str]) -> Tuple[int, str] | None: + """ + Support for doc comment as found in sphinx. + + @param node: the assignment node + @param lines: the lines of the source code, as generated by + C{code.splitlines(keepends=True)}. + @returns: A tuple linenumber, docstring or None if the assignment doesn't have a doc comment. + """ + # check doc comments after assignment + current_line = lines[node.lineno - 1] + parser = AfterCommentParser([current_line[node.col_offset:]] + + lines[node.lineno:]) + parser.parse() + if parser.comment and comment_re.match(parser.comment): + docstring = comment_re.sub('\\1', parser.comment) + return node.lineno, docstring + + # check doc comments before assignment + if indent_re.match(current_line[:node.col_offset]): + comment_lines = [] + for i in range(node.lineno - 1): + before_line = lines[node.lineno - 2 - i] + if comment_re.match(before_line): + comment_lines.append(comment_re.sub('\\1', before_line)) + else: + break + if comment_lines: + docstring = inspect.cleandoc('\n'.join(reversed(comment_lines))) + return node.lineno - len(comment_lines), docstring + + return None + +# This was part of the sphinx.pycode.parser module. \ No newline at end of file diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 6578138d6..1c7fdad8f 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2747,3 +2747,53 @@ def test_typealias_unstring(systemcls: Type[model.System]) -> None: # there is not Constant nodes in the type alias anymore next(n for n in ast.walk(typealias.value) if isinstance(n, ast.Constant)) +@systemcls_param +def test_doc_comment(systemcls: Type[model.System], capsys: CapSys) -> None: + """ + Tests for feature https://github.com/twisted/pydoctor/issues/800 + """ + code = ('class Foo(object):\n' + ' """class Foo!"""\n' + ' #: comment before attr1\n' + ' attr1 = None\n' + ' attr2 = None # attribute comment for attr2 (without colon)\n' + ' attr3 = None #: attribute comment for attr3\n' + ' attr4 = None #: long attribute comment\n' + ' #: for attr4\n' + ' #: comment before attr5\n' + ' attr5 = None #: attribute comment for attr5\n' + ' attr6, attr7 = 1, 2 #: this comment is ignored\n' + '\n' + ' def __init__(self):\n' + ' self.attr8 = None #: first attribute comment (ignored)\n' + ' self.attr8 = None #: attribute comment for attr8\n' + ' #: comment before attr9\n' + ' self.attr9 = None #: comment after attr9\n' + ' "string after attr9"\n' + '\n' + ' def bar(self, arg1, arg2=True, *args, **kwargs):\n' + ' """method Foo.bar"""\n' + ' pass\n' + '\n' + 'def baz():\n' + ' """function baz"""\n' + ' pass\n' + '\n' + 'class Qux: attr1 = 1; attr2 = 2') + + mod = fromText(code, systemcls=systemcls) + + def docs(name: str) -> str: + return mod.contents['Foo'].contents[name].docstring + + assert docs('attr1') == 'comment before attr1' + assert docs('attr2') == None # not a doc comment + assert docs('attr3') == 'attribute comment for attr3' + assert docs('attr4') == 'long attribute comment' + assert docs('attr4') == 'long attribute comment' + assert docs('attr5') == 'attribute comment for attr5' + assert docs('attr6') == None #'this comment is ignored' + assert docs('attr7') == None #'this comment is ignored' + assert docs('attr8') == 'attribute comment for attr8' + assert docs('attr9') == 'string after attr9' + \ No newline at end of file From d3f343a7067a3e52504560a4b20acf4be22ca29f Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Wed, 26 Jun 2024 18:45:06 -0400 Subject: [PATCH 10/40] Do not require the comnent to be indeted, idl why it's like this in shpinx... Refactor for a better separation of docstring sources. And add a test. --- pydoctor/astbuilder.py | 22 +++++++++++++--------- pydoctor/astutils.py | 29 +++++++++++++++++------------ pydoctor/test/test_astbuilder.py | 28 ++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 21 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index deccf9420..e433c2a4f 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -15,9 +15,9 @@ from pydoctor import epydoc2stan, model, node2stan, extensions, linker from pydoctor.epydoc.markup._pyval_repr import colorize_inline_pyval -from pydoctor.astutils import (is_none_literal, is_typing_annotation, is_using_annotations, is_using_typing_final, node2dottedname, node2fullname, extract_doc_comment, +from pydoctor.astutils import (is_none_literal, is_typing_annotation, is_using_annotations, is_using_typing_final, node2dottedname, node2fullname, is__name__equals__main__, unstring_annotation, upgrade_annotation, iterassign, extract_docstring_linenum, infer_type, get_parents, - get_docstring_node, unparse, NodeVisitor, Parentage, Str) + get_docstring_node, unparse, extract_doc_comment_before, extract_doc_comment_after, NodeVisitor, Parentage, Str) def parseFile(path: Path) -> tuple[ast.Module, Sequence[str]]: @@ -750,19 +750,23 @@ def _handleAssignment(self, def _handleDocComment(self, node: ast.Assign | ast.AnnAssign): # it does not work with tuple unpacking statements or multiple names at the moment + # as we cannot tell which of the variables the docstring is + # for, and how to assign the right side to the variables on the left. is_assign = isinstance(node, ast.Assign) if any(isinstance(t, ast.Tuple) for t in (node.targets if is_assign else [node.target])) or ( is_assign and len(node.targets) > 1): return # should we trigger a warning if a valid doc_comment is found? - doc_comment = extract_doc_comment(node, self.builder.lines_collection[self.module]) - if doc_comment: - linenumber, docstring = doc_comment - attr = self.builder.currentAttr - if attr is not None: - attr.docstring = docstring - attr.docstring_lineno = linenumber + attr = self.builder.currentAttr + if attr is None: + return + + lines = self.builder.lines_collection[self.module] + for doc_comment in [extract_doc_comment_before(node, lines), + extract_doc_comment_after(node, lines)]: + if doc_comment: + attr.docstring_lineno, attr.docstring = doc_comment # will be: attr._setDocstringValue(docstring, linenumber) diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index f268e78b4..a87f8acb3 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -901,7 +901,7 @@ def parse(self) -> None: comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$') indent_re = re.compile('^\\s*$') -def extract_doc_comment(node: ast.Assign | ast.AnnAssign, lines: Sequence[str]) -> Tuple[int, str] | None: +def extract_doc_comment_after(node: ast.Assign | ast.AnnAssign, lines: Sequence[str]) -> Tuple[int, str] | None: """ Support for doc comment as found in sphinx. @@ -919,18 +919,23 @@ def extract_doc_comment(node: ast.Assign | ast.AnnAssign, lines: Sequence[str]) docstring = comment_re.sub('\\1', parser.comment) return node.lineno, docstring + return None + +def extract_doc_comment_before(node: ast.Assign | ast.AnnAssign, lines: Sequence[str]) -> Tuple[int, str] | None: + """ + Same as L{extract_doc_comment_after} but fetch the comment before the assignment. + """ # check doc comments before assignment - if indent_re.match(current_line[:node.col_offset]): - comment_lines = [] - for i in range(node.lineno - 1): - before_line = lines[node.lineno - 2 - i] - if comment_re.match(before_line): - comment_lines.append(comment_re.sub('\\1', before_line)) - else: - break - if comment_lines: - docstring = inspect.cleandoc('\n'.join(reversed(comment_lines))) - return node.lineno - len(comment_lines), docstring + comment_lines = [] + for i in range(node.lineno - 1): + before_line = lines[node.lineno - 2 - i] + if comment_re.match(before_line): + comment_lines.append(comment_re.sub('\\1', before_line)) + else: + break + if comment_lines: + docstring = inspect.cleandoc('\n'.join(reversed(comment_lines))) + return node.lineno - len(comment_lines), docstring return None diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 1c7fdad8f..203f5616b 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2796,4 +2796,32 @@ def docs(name: str) -> str: assert docs('attr7') == None #'this comment is ignored' assert docs('attr8') == 'attribute comment for attr8' assert docs('attr9') == 'string after attr9' + +@systemcls_param +def test_doc_comment_module_var(systemcls: Type[model.System], capsys: CapSys) -> None: + src = """ + a: int = 42 #: This is a variable. + + #: This is b variable. + b = None + + #: This is c variable. + c: float #: This takes precedence! + + d: None #: This is also ignored. + '''Because I exist!''' + + #: this is not documentation + + e = 43 + """ + mod = fromText(src, systemcls=systemcls) + + def docs(name: str) -> str: + return mod.contents[name].docstring + + assert docs('a') == 'This is a variable.' + assert docs('c') == 'This takes precedence!' + assert docs('d') == 'Because I exist!' + assert docs('e') is None \ No newline at end of file From c00e68c29f05f1056a30c580b3718c4dcb56cd50 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 27 Jun 2024 00:03:46 -0400 Subject: [PATCH 11/40] Fix test --- pydoctor/test/test_astbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index be012e426..f165ee5df 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2850,7 +2850,7 @@ class A: def test_mutilple_docstring_with_doc_comments_warnings(systemcls: Type[model.System], capsys: CapSys) -> None: src = ''' class C: - a: int;"docs" #: re-docs + a: int;"re-docs" #: docs class B: """ From 8252d36a1093d0a5484a7ebfb6d72a8ea81104a7 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 27 Jun 2024 00:24:58 -0400 Subject: [PATCH 12/40] Add proper sphinx license --- LICENSE.txt | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/LICENSE.txt b/LICENSE.txt index 46799d31d..414de12e8 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -117,3 +117,67 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + +Support for token processing and doc-comments has been adapted from the Sphinx project - +as well as many other docstring parsing related helpers and features. +Sphinx is licensed as follows: + + +Copyright (c) 2007-2024 by the Sphinx team (see AUTHORS file). +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +The implementation of numpydoc docstring preprocessor +was derived from Sphinx's which itselft is partially derived +from code under the following license: + +------------------------------------------------------------------------------- + +Copyright (C) 2008 Stefan van der Walt , Pauli Virtanen + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, +INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file From 5ef6dc47b425b62401152bf9a0c39ae55bdf1376 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 27 Jun 2024 01:25:46 -0400 Subject: [PATCH 13/40] Add readme entry --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 27745e6f1..21ea3fa99 100644 --- a/README.rst +++ b/README.rst @@ -76,6 +76,8 @@ What's New? in development ^^^^^^^^^^^^^^ +* Add support for doc-comments as found in Sphinx. Use the special comment formatting ``#:`` to start the comment instead of just ``#``. + Comments need to be either on a line of their own before the definition, or immediately after the assignment on the same line. * Trigger a warning when several docstrings are detected for the same object. pydoctor 24.3.3 From c4a46a18de2c11388d0cade3b4be7411a29430c6 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 27 Jun 2024 21:36:53 -0400 Subject: [PATCH 14/40] Decode source files before calling splitlines() --- pydoctor/astbuilder.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index e64ef584c..d87eb1132 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -2,6 +2,7 @@ from __future__ import annotations import ast +import re import sys from functools import partial @@ -19,6 +20,7 @@ is__name__equals__main__, unstring_annotation, upgrade_annotation, iterassign, extract_docstring_linenum, infer_type, get_parents, get_docstring_node, unparse, extract_doc_comment_before, extract_doc_comment_after, NodeVisitor, Parentage, Str) +coding_re = re.compile(b'coding[=:]\s*([-\w.]+)') def parseFile(path: Path) -> tuple[ast.Module, Sequence[str]]: """ @@ -27,8 +29,10 @@ def parseFile(path: Path) -> tuple[ast.Module, Sequence[str]]: @returns: Tuple: ast module, sequence of source code lines. """ with open(path, 'rb') as f: - src = f.read() + b'\n' - return _parse(src, filename=str(path)), src.splitlines(keepends=True) + bytes_src = f.read() + b'\n' + return (_parse(bytes_src, filename=str(path)), + bytes_src.decode(encoding='utf-8', + errors='replace').splitlines(keepends=True)) if sys.version_info >= (3,8): _parse = partial(ast.parse, type_comments=True) From 0177b9e170f92c02e58d187d170c89f63462c654 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 28 Jun 2024 10:53:58 -0400 Subject: [PATCH 15/40] Simpler approach --- pydoctor/astbuilder.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index d87eb1132..45cdd3529 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -20,19 +20,16 @@ is__name__equals__main__, unstring_annotation, upgrade_annotation, iterassign, extract_docstring_linenum, infer_type, get_parents, get_docstring_node, unparse, extract_doc_comment_before, extract_doc_comment_after, NodeVisitor, Parentage, Str) -coding_re = re.compile(b'coding[=:]\s*([-\w.]+)') - def parseFile(path: Path) -> tuple[ast.Module, Sequence[str]]: """ Parse the contents of a Python source file. @returns: Tuple: ast module, sequence of source code lines. """ - with open(path, 'rb') as f: - bytes_src = f.read() + b'\n' - return (_parse(bytes_src, filename=str(path)), - bytes_src.decode(encoding='utf-8', - errors='replace').splitlines(keepends=True)) + # TODO: Here we are assuming the file encoding of uft-8, but source code files + # can use a different encoding: https://stackoverflow.com/a/729016 + src = path.read_text() + '\n' + return _parse(src, filename=str(path)), src.splitlines(keepends=True) if sys.version_info >= (3,8): _parse = partial(ast.parse, type_comments=True) From a5620f32588389c3087af914af9f05b9056057d4 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 28 Jun 2024 11:18:09 -0400 Subject: [PATCH 16/40] Add a test for a not-utf8 encoded file --- pydoctor/test/test_astbuilder.py | 10 ++++++++++ pydoctor/test/testpackages/coding_not_utf8/__init__.py | 0 .../test/testpackages/coding_not_utf8/ascii_coding.py | 3 +++ .../test/testpackages/coding_not_utf8/other_coding.py | 3 +++ 4 files changed, 16 insertions(+) create mode 100644 pydoctor/test/testpackages/coding_not_utf8/__init__.py create mode 100644 pydoctor/test/testpackages/coding_not_utf8/ascii_coding.py create mode 100644 pydoctor/test/testpackages/coding_not_utf8/other_coding.py diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index f165ee5df..1fdffae2b 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2878,3 +2878,13 @@ class B2: assert mod.contents['C'].contents['a'].docstring == 're-docs' assert mod.contents['B'].contents['a'].docstring == 're-docs' assert mod.contents['B2'].contents['a'].docstring == 're-re-docs' + +@systemcls_param +def test_other_encoding(systemcls: Type[model.System], capsys: CapSys) -> None: + # Test for issue https://github.com/twisted/pydoctor/issues/805 + # We're missing support for other kind of encodings. + processPackage('coding_not_utf8', + systemcls=lambda: model.System(model.Options.from_args(['-q']))) + errs = capsys.readouterr().out.splitlines() + assert len(errs) == 1 + assert errs[0].endswith("pydoctor/test/testpackages/coding_not_utf8/other_coding.py:???: cannot parse file, 'utf-8' codec can't decode byte 0xa1 in position 46: invalid start byte\n") \ No newline at end of file diff --git a/pydoctor/test/testpackages/coding_not_utf8/__init__.py b/pydoctor/test/testpackages/coding_not_utf8/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pydoctor/test/testpackages/coding_not_utf8/ascii_coding.py b/pydoctor/test/testpackages/coding_not_utf8/ascii_coding.py new file mode 100644 index 000000000..bf9667a38 --- /dev/null +++ b/pydoctor/test/testpackages/coding_not_utf8/ascii_coding.py @@ -0,0 +1,3 @@ +# -*- coding: ascii +var = True +'HELUZ H4 - kominy.xml' \ No newline at end of file diff --git a/pydoctor/test/testpackages/coding_not_utf8/other_coding.py b/pydoctor/test/testpackages/coding_not_utf8/other_coding.py new file mode 100644 index 000000000..616b553fc --- /dev/null +++ b/pydoctor/test/testpackages/coding_not_utf8/other_coding.py @@ -0,0 +1,3 @@ +# -*- coding: cp852 +var = True +'HELUZ H4 - kom¡ny.xml' \ No newline at end of file From 5ebcfc880c7e3add739ca2a9a66e29fb0d073505 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 28 Jun 2024 11:18:43 -0400 Subject: [PATCH 17/40] Fix test --- pydoctor/test/test_astbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 1fdffae2b..72f0d40c0 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2887,4 +2887,4 @@ def test_other_encoding(systemcls: Type[model.System], capsys: CapSys) -> None: systemcls=lambda: model.System(model.Options.from_args(['-q']))) errs = capsys.readouterr().out.splitlines() assert len(errs) == 1 - assert errs[0].endswith("pydoctor/test/testpackages/coding_not_utf8/other_coding.py:???: cannot parse file, 'utf-8' codec can't decode byte 0xa1 in position 46: invalid start byte\n") \ No newline at end of file + assert errs[0].endswith("pydoctor/test/testpackages/coding_not_utf8/other_coding.py:???: cannot parse file, 'utf-8' codec can't decode byte 0xa1 in position 46: invalid start byte") \ No newline at end of file From fab08ee81635724e63001db7a46205c1c6f35611 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Mon, 19 Aug 2024 22:58:09 -0400 Subject: [PATCH 18/40] Fix the aliasing/doc-comment interactio issue --- pydoctor/astbuilder.py | 4 ++++ pydoctor/test/test_astbuilder.py | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 45cdd3529..376ecaa68 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -606,6 +606,8 @@ def _handleAssignmentInModule(self, assert isinstance(module, model.Module) if not _handleAliasing(module, target, expr): self._handleModuleVar(target, annotation, expr, lineno, augassign=augassign) + else: + self.builder.currentAttr = None def _handleClassVar(self, name: str, @@ -678,6 +680,8 @@ def _handleAssignmentInClass(self, assert isinstance(cls, model.Class) if not _handleAliasing(cls, target, expr): self._handleClassVar(target, annotation, expr, lineno, augassign=augassign) + else: + self.builder.currentAttr = None def _handleDocstringUpdate(self, targetNode: ast.expr, diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 303cec09e..9d8be726f 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2889,3 +2889,14 @@ def test_other_encoding(systemcls: Type[model.System], capsys: CapSys) -> None: errs = capsys.readouterr().out.splitlines() assert len(errs) == 1 assert errs[0].endswith("pydoctor/test/testpackages/coding_not_utf8/other_coding.py:???: cannot parse file, 'utf-8' codec can't decode byte 0xa1 in position 46: invalid start byte") + +@systemcls_param +def test_alias_resets_attribute_state(systemcls: Type[model.System], capsys:CapSys) -> None: + # from https://github.com/lxml/lxml/blob/a56babb0013dc46baf480f49ebd5cc1ab65bc418/src/lxml/html/builder.py + src = ''' + E = True #: Legit docstring + A = E.a #: trash1 + ABBR = E.abbr #: trash2 + ''' + mod = fromText(src, systemcls=systemcls) + assert not capsys.readouterr().out From 32181af7edb06ed652e50a58bf48b77692f7f019 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Mon, 19 Aug 2024 23:01:18 -0400 Subject: [PATCH 19/40] Auto-detect source code encoding --- pydoctor/astbuilder.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 376ecaa68..84f58eb2c 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -2,7 +2,7 @@ from __future__ import annotations import ast -import re +import tokenize import sys from functools import partial @@ -26,9 +26,8 @@ def parseFile(path: Path) -> tuple[ast.Module, Sequence[str]]: @returns: Tuple: ast module, sequence of source code lines. """ - # TODO: Here we are assuming the file encoding of uft-8, but source code files - # can use a different encoding: https://stackoverflow.com/a/729016 - src = path.read_text() + '\n' + with tokenize.open(path) as f: + src = f.read() + '\n' return _parse(src, filename=str(path)), src.splitlines(keepends=True) if sys.version_info >= (3,8): From bf165a5402f65af169522e1f2c9d20bb06a37a3e Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Mon, 19 Aug 2024 23:04:07 -0400 Subject: [PATCH 20/40] Fix test --- pydoctor/test/test_astbuilder.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 9d8be726f..4db604cf5 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2886,10 +2886,8 @@ def test_other_encoding(systemcls: Type[model.System], capsys: CapSys) -> None: # We're missing support for other kind of encodings. processPackage('coding_not_utf8', systemcls=lambda: systemcls(model.Options.from_args(['-q']))) - errs = capsys.readouterr().out.splitlines() - assert len(errs) == 1 - assert errs[0].endswith("pydoctor/test/testpackages/coding_not_utf8/other_coding.py:???: cannot parse file, 'utf-8' codec can't decode byte 0xa1 in position 46: invalid start byte") - + assert not capsys.readouterr().out + @systemcls_param def test_alias_resets_attribute_state(systemcls: Type[model.System], capsys:CapSys) -> None: # from https://github.com/lxml/lxml/blob/a56babb0013dc46baf480f49ebd5cc1ab65bc418/src/lxml/html/builder.py From 45ee08922ddd5664fc39537c8c48a768f7aa8610 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Mon, 19 Aug 2024 23:15:44 -0400 Subject: [PATCH 21/40] Fix test --- pydoctor/test/test_astbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 4db604cf5..885b29a8c 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2820,7 +2820,7 @@ def docs(name: str) -> str: assert docs('d') == 'Because I exist!' assert docs('e') is None - +@systemcls_param def test_mutilple_docstrings_warnings(systemcls: Type[model.System], capsys: CapSys) -> None: """ When pydoctor encounters multiple places where the docstring is defined, it reports a warning. From d37df787c8b222da4c640eac12a9d55887580528 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Thu, 12 Sep 2024 20:32:47 -0400 Subject: [PATCH 22/40] Add clear documentation regarding the doc-comments. --- docs/source/codedoc.rst | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/docs/source/codedoc.rst b/docs/source/codedoc.rst index 06fa4325d..152a9a66c 100644 --- a/docs/source/codedoc.rst +++ b/docs/source/codedoc.rst @@ -83,6 +83,28 @@ Assignments to ``__doc__`` inside functions are ignored by pydoctor. This can be Augmented assignments like ``+=`` are currently ignored as well, but that is an implementation limitation rather than a design decision, so this might change in the future. +Doc-comments +------------ + +Documentation can also be put into a comment with special formatting, using a ``#:`` to start the comment instead of just ``#``. +Comments need to be either on their own before the definition, OR immediately after the assignment on the same line. +The latter form is restricted to one line only.:: + + var = True #: Doc comment for module attribute. + + class Foo: + + #: Doc comment for class attribute Foo.bar. + #: It can have multiple lines. + #: @type: int + bar = 1 + + flox = 1.5 #: Doc comment for Foo.flox. One line only. + + def __init__(self): + #: Doc comment for instance attribute qux. + self.qux = 3 + Constants --------- From 43f12bbfe2ce5dd6030a380b88737c6a4ecac116 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 13 Sep 2024 06:51:26 -0400 Subject: [PATCH 23/40] Fix a couple of mypy errors... --- pydoctor/astbuilder.py | 21 +++++++++++---------- pydoctor/test/test_astbuilder.py | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 84f58eb2c..f65a62621 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -755,11 +755,11 @@ def _handleAssignment(self, def _handleDocComment(self, node: ast.Assign | ast.AnnAssign): # it does not work with tuple unpacking statements or multiple names at the moment # as we cannot tell which of the variables the docstring is - # for, and how to assign the right side to the variables on the left. - is_assign = isinstance(node, ast.Assign) - if any(isinstance(t, ast.Tuple) for t in - (node.targets if is_assign else [node.target])) or ( - is_assign and len(node.targets) > 1): + # for, and how to assign the right side to the variables on the left.... + # Sphinx on the opposite will assigne the same docstring to all variables detected on the left hand side. + # We need PR #585 before merging this feature! + targets = node.targets if isinstance(node, ast.Assign) else [node.target] + if any(isinstance(t, ast.Tuple) for t in targets) or len(node.targets) > 1: return # should we trigger a warning if a valid doc_comment is found? attr = self.builder.currentAttr @@ -767,10 +767,11 @@ def _handleDocComment(self, node: ast.Assign | ast.AnnAssign): return lines = self.builder.lines_collection[self.module] - for doc_comment in [extract_doc_comment_before(node, lines), - extract_doc_comment_after(node, lines)]: - if doc_comment: - attr._setDocstringValue(doc_comment[1], doc_comment[0]) + if lines: + for doc_comment in [extract_doc_comment_before(node, lines), + extract_doc_comment_after(node, lines)]: + if doc_comment: + attr._setDocstringValue(doc_comment[1], doc_comment[0]) def visit_Assign(self, node: ast.Assign) -> None: @@ -1103,7 +1104,7 @@ def __init__(self, system: model.System): self._stack: List[model.Documentable] = [] self.ast_cache: Dict[Path, Optional[ast.Module]] = {} #: avoids calling parse() twice for the same path - self.lines_collection: dict[model.Module, Sequence[str]] = {} #: mapping from modules to source code lines + self.lines_collection: dict[model.Module, Sequence[str] | None] = {} #: mapping from modules to source code lines def _push(self, cls: Type[DocumentableT], name: str, lineno: int) -> DocumentableT: diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 885b29a8c..57cac3222 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2778,7 +2778,7 @@ def test_doc_comment(systemcls: Type[model.System], capsys: CapSys) -> None: mod = fromText(code, systemcls=systemcls) - def docs(name: str) -> str: + def docs(name: str) -> str | None: return mod.contents['Foo'].contents[name].docstring assert docs('attr1') == 'comment before attr1' @@ -2812,7 +2812,7 @@ def test_doc_comment_module_var(systemcls: Type[model.System], capsys: CapSys) """ mod = fromText(src, systemcls=systemcls) - def docs(name: str) -> str: + def docs(name: str) -> str | None: return mod.contents[name].docstring assert docs('a') == 'This is a variable.' From c288a96635ef110b7c12a8881e98c77d3f38118e Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 13 Sep 2024 07:49:47 -0400 Subject: [PATCH 24/40] cleanup --- pydoctor/astbuilder.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 9df5fee47..0b9f14cb8 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -545,12 +545,6 @@ def _storeAttrValue(obj:model.Attribute, new_value:Optional[ast.expr], else: obj.value = new_value - def _storeCurrentAttr(self, obj:model.Attribute, - augassign:Optional[object]=None) -> None: - if not augassign: - self.builder.currentAttr = obj - else: - self.builder.currentAttr = None def _handleModuleVar(self, target: str, @@ -595,7 +589,6 @@ def _handleModuleVar(self, self._handleConstant(obj, annotation, expr, lineno, model.DocumentableKind.VARIABLE) self._storeAttrValue(obj, expr, augassign) - self._storeCurrentAttr(obj, augassign) def _handleAssignmentInModule(self, target: str, @@ -640,7 +633,7 @@ def _handleClassVar(self, self._handleConstant(obj, annotation, expr, lineno, model.DocumentableKind.CLASS_VARIABLE) self._storeAttrValue(obj, expr, augassign) - self._storeCurrentAttr(obj, augassign) + def _handleInstanceVar(self, name: str, @@ -665,7 +658,6 @@ def _handleInstanceVar(self, # undonditionnaly set the kind to ivar obj.kind = model.DocumentableKind.INSTANCE_VARIABLE self._storeAttrValue(obj, expr) - self._storeCurrentAttr(obj) def _handleAssignmentInClass(self, target: str, @@ -840,13 +832,6 @@ def visit_AugAssign(self, node:ast.AugAssign) -> None: self._handleAssignment(node.target, None, node.value, node.lineno, augassign=node.op) - def visit_Expr(self, node: ast.Expr) -> None: - value = node.value - if isinstance(value, Str): - attr = self.builder.currentAttr - if attr is not None: - attr.setDocstring(value) - self.builder.currentAttr = None def visit_Expr(self, node: ast.Expr) -> None: # Visit's ast.Expr.value with the visitor, used by extensions to visit top-level calls. From c44b3c8753fc3a31c10177a547520a7622eadfa4 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 13 Sep 2024 16:11:45 -0400 Subject: [PATCH 25/40] Use a control flow exception in order to control whether to look for docstring assignments or irgnore it. --- pydoctor/astbuilder.py | 75 ++++++++++++++++---------------- pydoctor/test/test_astbuilder.py | 23 +++++++++- 2 files changed, 60 insertions(+), 38 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 0b9f14cb8..bc9974c2b 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -42,12 +42,10 @@ def _maybeAttribute(cls: model.Class, name: str) -> bool: obj = cls.find(name) return obj is None or isinstance(obj, model.Attribute) -class SkipInlineDocstring(Exception): - ... -class NotANewBinding(Exception): - ... -class UsingSelfInUnknownContext(Exception): - ... +class IgnoreAssignment(Exception): + """ + A control flow exception meaning that the assignment should not be further proccessed. + """ def _handleAliasing( ctx: model.CanContainImportsDocumentable, @@ -556,7 +554,7 @@ def _handleModuleVar(self, if target in MODULE_VARIABLES_META_PARSERS: # This is metadata, not a variable that needs to be documented, # and therefore doesn't need an Attribute instance. - return + raise IgnoreAssignment() parent = self.builder.current obj = parent.contents.get(target) if obj is None: @@ -580,7 +578,7 @@ def _handleModuleVar(self, # TODO: Should we report a warning? if not isinstance(obj, model.Attribute): - raise SkipInlineDocstring() + raise IgnoreAssignment() self._setAttributeAnnotation(obj, annotation) @@ -601,6 +599,8 @@ def _handleAssignmentInModule(self, assert isinstance(module, model.Module) if not _handleAliasing(module, target, expr): self._handleModuleVar(target, annotation, expr, lineno, augassign=augassign) + else: + raise IgnoreAssignment() def _handleClassVar(self, name: str, @@ -613,7 +613,7 @@ def _handleClassVar(self, cls = self.builder.current assert isinstance(cls, model.Class) if not _maybeAttribute(cls, name): - raise SkipInlineDocstring() + raise IgnoreAssignment() # Class variables can only be Attribute, so it's OK to cast obj = cast(Optional[model.Attribute], cls.contents.get(name)) @@ -641,11 +641,10 @@ def _handleInstanceVar(self, expr: Optional[ast.expr], lineno: int ) -> None: - cls = self._getClassFromMethodContext() - if not cls: - return + if not (cls:=self._getClassFromMethodContext()): + raise IgnoreAssignment() if not _maybeAttribute(cls, name): - raise SkipInlineDocstring() + raise IgnoreAssignment() # Class variables can only be Attribute, so it's OK to cast because we used _maybeAttribute() above. obj = cast(Optional[model.Attribute], cls.contents.get(name)) @@ -670,6 +669,8 @@ def _handleAssignmentInClass(self, assert isinstance(cls, model.Class) if not _handleAliasing(cls, target, expr): self._handleClassVar(target, annotation, expr, lineno, augassign=augassign) + else: + raise IgnoreAssignment() def _handleDocstringUpdate(self, targetNode: ast.expr, @@ -738,6 +739,7 @@ def _handleAssignment(self, value = targetNode.value if targetNode.attr == '__doc__': self._handleDocstringUpdate(value, expr, lineno) + raise IgnoreAssignment() elif isinstance(value, ast.Name) and value.id == 'self': self._handleInstanceVar(targetNode.attr, annotation, expr, lineno) @@ -761,7 +763,7 @@ def visit_Assign(self, node: ast.Assign) -> None: self._handleAssignment(elem, None, None, lineno) else: self._handleAssignment(target, annotation, expr, lineno) - except SkipInlineDocstring: + except IgnoreAssignment: continue else: if not isTupleAssignment: @@ -775,7 +777,7 @@ def visit_AnnAssign(self, node: ast.AnnAssign) -> None: node.annotation, self.builder.current), self.builder.current) try: self._handleAssignment(node.target, annotation, node.value, node.lineno) - except SkipInlineDocstring: + except IgnoreAssignment: return else: self._handleInlineDocstrings(node, node.target) @@ -789,44 +791,43 @@ def _getClassFromMethodContext(self) -> Optional[model.Class]: return None return cls - def _contextualizeTarget(self, target:ast.expr) -> Tuple[List[str], model.Documentable]: + def _contextualizeTarget(self, target:ast.expr) -> Tuple[model.Documentable, str]: """ - If the current context is a method, strip the C{'self.'} part of assignment names and return - the right L{Class} context in which to use the new name. The new name maybe dotted. + Find out the documentatble wich is the parent of the assignment's target as well as it's name. - @returns: Tuple C{(dottedname, context)}. - @raises: L{NotANewBinding} or L{UsingSelfInUnknownContext}. + @returns: Tuple C{parent, name}. + @raises: L{ValueError} if the target does not bind a new variable. """ dottedname = node2dottedname(target) - if dottedname is None or len(dottedname) >= 3: - raise NotANewBinding() - parent = self.builder.current - if dottedname[0] == 'self': + if not dottedname or len(dottedname) > 2: + raise ValueError() + if len(dottedname) == 2 and dottedname[0] == 'self': + # an instance variable. + # TODO: This currently only works if the first argument of methods + # is named 'self'. + if (maybe_cls:=self._getClassFromMethodContext()) is None: + raise ValueError() dottedname = dottedname[1:] - maybe_parent = self._getClassFromMethodContext() - if not maybe_parent: - raise UsingSelfInUnknownContext() - parent = maybe_parent - return dottedname, parent + parent = maybe_cls + elif len(dottedname) != 1: + raise ValueError() + else: + parent = self.builder.current + return parent, dottedname[0] def _handleInlineDocstrings(self, assign:Union[ast.Assign, ast.AnnAssign], target:ast.expr) -> None: # Process the inline docstrings try: - dottedname, parent = self._contextualizeTarget(target) - except (NotANewBinding, UsingSelfInUnknownContext): + parent, name = self._contextualizeTarget(target) + except ValueError: return - if len(dottedname) != 1: - return - docstring_node = get_assign_docstring_node(assign) if docstring_node: - # fetch the target of the inline docstring - attr = parent.contents.get(dottedname[0]) + attr = parent.contents.get(name) if attr: attr.setDocstring(docstring_node) - def visit_AugAssign(self, node:ast.AugAssign) -> None: self._handleAssignment(node.target, None, node.value, diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 053342be4..085022e62 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2792,4 +2792,25 @@ class B2: ''' fromText(src, systemcls=systemcls) # TODO: handle doc comments.x - assert capsys.readouterr().out == ':18: Existing docstring at line 14 is overriden\n' \ No newline at end of file + assert capsys.readouterr().out == ':18: Existing docstring at line 14 is overriden\n' + +@systemcls_param +def test_inline_docstring_multiple_assigments(systemcls: Type[model.System], capsys: CapSys) -> None: + # TODO: this currently does not support nested tuple assignments. + src = ''' + class C: + def __init__(self): + self.x, x = 1, 1; 'x docs' + self.y = x = 1; 'y docs' + x,y = 1,1; 'x and y docs' + v = w = 1; 'v and w docs' + ''' + mod = fromText(src, systemcls=systemcls) + assert not capsys.readouterr().out + assert mod.contents['x'].docstring == 'x and y docs' + assert mod.contents['y'].docstring == 'x and y docs' + assert mod.contents['v'].docstring == 'v and w docs' + assert mod.contents['w'].docstring == 'v and w docs' + assert mod.contents['C'].contents['x'].docstring == 'x docs' + assert mod.contents['C'].contents['y'].docstring == 'y docs' + From daf7f637706bb528149599217165a9711a33c342 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 13 Sep 2024 18:56:48 -0400 Subject: [PATCH 26/40] Fix unjandled control flow error --- pydoctor/astbuilder.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index bc9974c2b..1c9840ed5 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -830,8 +830,11 @@ def _handleInlineDocstrings(self, assign:Union[ast.Assign, ast.AnnAssign], targe attr.setDocstring(docstring_node) def visit_AugAssign(self, node:ast.AugAssign) -> None: - self._handleAssignment(node.target, None, node.value, - node.lineno, augassign=node.op) + try: + self._handleAssignment(node.target, None, node.value, + node.lineno, augassign=node.op) + except IgnoreAssignment: + pass def visit_Expr(self, node: ast.Expr) -> None: From e7d782659f49a3dbf72d5d8aed5dc87537853f6b Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 13 Sep 2024 18:57:02 -0400 Subject: [PATCH 27/40] add few tests --- pydoctor/test/test_astbuilder.py | 25 +++++++++++++++++++++++ pydoctor/test/test_epydoc2stan.py | 34 ++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 085022e62..1594daa54 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2814,3 +2814,28 @@ def __init__(self): assert mod.contents['C'].contents['x'].docstring == 'x docs' assert mod.contents['C'].contents['y'].docstring == 'y docs' + +@systemcls_param +def test_does_not_misinterpret_string_as_documentation(systemcls: Type[model.System], capsys: CapSys) -> None: + # exmaple from numpy/distutils/ccompiler_opt.py + src = ''' + __docformat__ = 'numpy' + class C: + """ + Attributes + ---------- + cc_noopt : bool + docs + """ + def __init__(self): + self.cc_noopt = True + + if True: + """ + this is not documentation + """ + ''' + + mod = fromText(src, systemcls=systemcls) + assert not capsys.readouterr().out + assert mod.contents['C'].contents['cc_noopt'].docstring == 'docs' \ No newline at end of file diff --git a/pydoctor/test/test_epydoc2stan.py b/pydoctor/test/test_epydoc2stan.py index b35a57062..3610c9a48 100644 --- a/pydoctor/test/test_epydoc2stan.py +++ b/pydoctor/test/test_epydoc2stan.py @@ -2162,4 +2162,36 @@ def create_repository(self) -> repository.Repository: mod = fromText(code, ) docstring2html(mod.contents['Settings']) captured = capsys.readouterr().out - assert captured == ':15: Cannot find link target for "TypeError"\n' \ No newline at end of file + assert captured == ':15: Cannot find link target for "TypeError"\n' + +def test_does_not_loose_type_linenumber(capsys: CapSys) -> None: + # exmaple from numpy/distutils/ccompiler_opt.py + src = ''' + class C: + """ + Some docs bla + bla + bla + bla + + @ivar one: trash + @type cc_noopt: L{bool} + @ivar cc_noopt: docs + """ + def __init__(self): + self.cc_noopt = True + """ + docs again + """ + ''' + + system = model.System(model.Options.from_args('-q')) + mod = fromText(src, system=system) + assert mod.contents['C'].contents['cc_noopt'].docstring == 'docs again' + + from pydoctor.test.test_templatewriter import getHTMLOf + # we use this function as a shortcut to trigger + # the link not found warnings. + getHTMLOf(mod.contents['C']) + assert capsys.readouterr().out == (':16: Existing docstring at line 10 is overriden\n' + ':10: Cannot find link target for "bool"\n') \ No newline at end of file From 674ce69f76758a4cfbbe004c232c92abc9a45b7c Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Fri, 13 Sep 2024 19:05:59 -0400 Subject: [PATCH 28/40] Fix typing. --- pydoctor/astbuilder.py | 3 ++- pydoctor/astutils.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 1c9840ed5..f325df725 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -769,7 +769,7 @@ def visit_Assign(self, node: ast.Assign) -> None: if not isTupleAssignment: self._handleInlineDocstrings(node, target) else: - for elem in target.elts: + for elem in cast(ast.Tuple, target).elts: # mypy is not as smart as pyright yet. self._handleInlineDocstrings(node, elem) def visit_AnnAssign(self, node: ast.AnnAssign) -> None: @@ -801,6 +801,7 @@ def _contextualizeTarget(self, target:ast.expr) -> Tuple[model.Documentable, str dottedname = node2dottedname(target) if not dottedname or len(dottedname) > 2: raise ValueError() + parent: model.Documentable if len(dottedname) == 2 and dottedname[0] == 'self': # an instance variable. # TODO: This currently only works if the first argument of methods diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index 78642a5ef..a4c900fd6 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -216,7 +216,7 @@ def is_using_annotations(expr: Optional[ast.AST], return True return False -def get_assign_docstring_node(assign:ast.Assig | ast.AnnAssign) -> Str | None: +def get_assign_docstring_node(assign:ast.Assign | ast.AnnAssign) -> Str | None: """ Get the docstring for a L{ast.Assign} or L{ast.AnnAssign} node. From a38c793cfc8eeddbdba469661f09b53c3123bbc0 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 14 Sep 2024 16:06:20 -0400 Subject: [PATCH 29/40] Fix docstring and add indicative exception messages --- pydoctor/astbuilder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index f325df725..cb75b210d 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -796,22 +796,22 @@ def _contextualizeTarget(self, target:ast.expr) -> Tuple[model.Documentable, str Find out the documentatble wich is the parent of the assignment's target as well as it's name. @returns: Tuple C{parent, name}. - @raises: L{ValueError} if the target does not bind a new variable. + @raises ValueError: if the target does not bind a new variable. """ dottedname = node2dottedname(target) if not dottedname or len(dottedname) > 2: - raise ValueError() + raise ValueError('does not bind a new variable') parent: model.Documentable if len(dottedname) == 2 and dottedname[0] == 'self': # an instance variable. # TODO: This currently only works if the first argument of methods # is named 'self'. if (maybe_cls:=self._getClassFromMethodContext()) is None: - raise ValueError() + raise ValueError('using self in unsupported context') dottedname = dottedname[1:] parent = maybe_cls elif len(dottedname) != 1: - raise ValueError() + raise ValueError('does not bind a new variable') else: parent = self.builder.current return parent, dottedname[0] From b9e7ed2ec6384bf128cbca80610557352ce3e6bd Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 14 Sep 2024 18:12:36 -0400 Subject: [PATCH 30/40] Fix the test case --- pydoctor/test/test_astbuilder.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 1594daa54..f168ddf52 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -8,7 +8,7 @@ from pydoctor.options import Options from pydoctor.stanutils import flatten, html2stan, flatten_text from pydoctor.epydoc.markup.epytext import Element, ParsedEpytextDocstring -from pydoctor.epydoc2stan import format_summary, get_parsed_type +from pydoctor.epydoc2stan import _get_docformat, format_summary, get_parsed_type from pydoctor.test.test_packages import processPackage from pydoctor.utils import partialclass @@ -2828,7 +2828,7 @@ class C: docs """ def __init__(self): - self.cc_noopt = True + self.cc_noopt = x if True: """ @@ -2837,5 +2837,6 @@ def __init__(self): ''' mod = fromText(src, systemcls=systemcls) + assert _get_docformat(mod) == 'numpy' assert not capsys.readouterr().out - assert mod.contents['C'].contents['cc_noopt'].docstring == 'docs' \ No newline at end of file + assert to_html(mod.contents['C'].contents['cc_noopt'].parsed_docstring) == 'docs' \ No newline at end of file From 54cdd9b8fe9cc8d3622e8a8395321d54f89a865b Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sat, 14 Sep 2024 18:19:02 -0400 Subject: [PATCH 31/40] fix typing --- pydoctor/test/test_astbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index f168ddf52..42b49ce14 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2839,4 +2839,4 @@ def __init__(self): mod = fromText(src, systemcls=systemcls) assert _get_docformat(mod) == 'numpy' assert not capsys.readouterr().out - assert to_html(mod.contents['C'].contents['cc_noopt'].parsed_docstring) == 'docs' \ No newline at end of file + assert to_html(mod.contents['C'].contents['cc_noopt'].parsed_docstring) == 'docs' #type:ignore \ No newline at end of file From 8c60beb0614b38d6f90d84a077f22c9ee1365af0 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sun, 15 Sep 2024 07:15:05 -0400 Subject: [PATCH 32/40] Add few test cases in order to cover all new lines of code. --- pydoctor/test/test_astbuilder.py | 46 +++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 42b49ce14..a33ad0d7c 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2839,4 +2839,48 @@ def __init__(self): mod = fromText(src, systemcls=systemcls) assert _get_docformat(mod) == 'numpy' assert not capsys.readouterr().out - assert to_html(mod.contents['C'].contents['cc_noopt'].parsed_docstring) == 'docs' #type:ignore \ No newline at end of file + assert mod.contents['C'].contents['cc_noopt'].docstring is None + # The docstring is None... this is the sad side effect of processing ivar fields :/ + + assert to_html(mod.contents['C'].contents['cc_noopt'].parsed_docstring) == 'docs' #type:ignore + +@systemcls_param +def test_unsupported_usage_of_self(systemcls: Type[model.System], capsys: CapSys) -> None: + src = ''' + class C: + ... + def C_init(self): + self.x = True + self.y += False # erroneous usage of augassign + C.__init__ = C_init + + self = object() + self.x = False + """ + not documentation + """ + ''' + mod = fromText(src, systemcls=systemcls) + assert not capsys.readouterr().out + assert list(mod.contents['C'].contents) == [] + assert not mod.contents['self'].docstring + +@systemcls_param +def test_inline_docstring_at_wrong_place(systemcls: Type[model.System], capsys: CapSys) -> None: + src = ''' + a = objetc() + a.b = True + """ + not documentation + """ + b = object() + b.x: bool = False + """ + still not documentation + """ + ''' + mod = fromText(src, systemcls=systemcls) + assert not capsys.readouterr().out + assert list(mod.contents) == ['a', 'b'] + assert not mod.contents['a'].docstring + assert not mod.contents['b'].docstring \ No newline at end of file From 53c4e70eeed3fe0df339f44d8fe65480532c5ca1 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sun, 15 Sep 2024 08:25:54 -0400 Subject: [PATCH 33/40] Try to cover all branches... --- pydoctor/astbuilder.py | 5 +++++ pydoctor/test/test_astbuilder.py | 26 ++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index cb75b210d..a0c3d65f8 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -727,6 +727,9 @@ def _handleAssignment(self, lineno: int, augassign:Optional[ast.operator]=None, ) -> None: + """ + @raises IgnoreAssignment: If the assignemnt should not be further processed. + """ if isinstance(targetNode, ast.Name): target = targetNode.id scope = self.builder.current @@ -742,6 +745,8 @@ def _handleAssignment(self, raise IgnoreAssignment() elif isinstance(value, ast.Name) and value.id == 'self': self._handleInstanceVar(targetNode.attr, annotation, expr, lineno) + else: + raise IgnoreAssignment() def visit_Assign(self, node: ast.Assign) -> None: lineno = node.lineno diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index a33ad0d7c..e456eae5f 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -2850,8 +2850,8 @@ def test_unsupported_usage_of_self(systemcls: Type[model.System], capsys: CapSys class C: ... def C_init(self): - self.x = True - self.y += False # erroneous usage of augassign + self.x = True; 'not documentation' + self.y += False # erroneous usage of augassign; 'not documentation' C.__init__ = C_init self = object() @@ -2878,9 +2878,27 @@ def test_inline_docstring_at_wrong_place(systemcls: Type[model.System], capsys: """ still not documentation """ + c = {} + c[1] = True + """ + Again not documenatation + """ + d = {} + d[1].__init__ = True + """ + Again not documenatation + """ + e = {} + e[1].__init__ += True + """ + Again not documenatation + """ ''' mod = fromText(src, systemcls=systemcls) assert not capsys.readouterr().out - assert list(mod.contents) == ['a', 'b'] + assert list(mod.contents) == ['a', 'b', 'c', 'd', 'e'] assert not mod.contents['a'].docstring - assert not mod.contents['b'].docstring \ No newline at end of file + assert not mod.contents['b'].docstring + assert not mod.contents['c'].docstring + assert not mod.contents['d'].docstring + assert not mod.contents['e'].docstring \ No newline at end of file From 4613459674f810c9932140958362f79a16c284d4 Mon Sep 17 00:00:00 2001 From: tristanlatr <19967168+tristanlatr@users.noreply.github.com> Date: Sun, 15 Sep 2024 15:58:20 -0400 Subject: [PATCH 34/40] Remove comment --- pydoctor/astbuilder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index a0c3d65f8..b1a75a177 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -575,7 +575,6 @@ def _handleModuleVar(self, # We don't know how to handle this, # so we ignore it to document the original object. This means that we might document arguments # that are in reality not existing because they have values in a partial() call for instance. - # TODO: Should we report a warning? if not isinstance(obj, model.Attribute): raise IgnoreAssignment() From 87ff39c00b4f26dd3a20737271bac6181da83ac9 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Mon, 16 Sep 2024 07:33:21 -0400 Subject: [PATCH 35/40] Fix crash --- pydoctor/astbuilder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index f65a62621..53dd47ff7 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -759,7 +759,7 @@ def _handleDocComment(self, node: ast.Assign | ast.AnnAssign): # Sphinx on the opposite will assigne the same docstring to all variables detected on the left hand side. # We need PR #585 before merging this feature! targets = node.targets if isinstance(node, ast.Assign) else [node.target] - if any(isinstance(t, ast.Tuple) for t in targets) or len(node.targets) > 1: + if any(isinstance(t, ast.Tuple) for t in targets) or len(targets) > 1: return # should we trigger a warning if a valid doc_comment is found? attr = self.builder.currentAttr From 415c7d9e1eedf51c86292d81bcfdf67da69cf455 Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Mon, 16 Sep 2024 09:38:48 -0400 Subject: [PATCH 36/40] Remove duplicate docs --- pydoctor/epydoc/markup/restructuredtext.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pydoctor/epydoc/markup/restructuredtext.py b/pydoctor/epydoc/markup/restructuredtext.py index 36e2cf796..753a21a0b 100644 --- a/pydoctor/epydoc/markup/restructuredtext.py +++ b/pydoctor/epydoc/markup/restructuredtext.py @@ -58,9 +58,6 @@ from pydoctor.epydoc.docutils import new_document from pydoctor.model import Documentable -#: A dictionary whose keys are the "consolidated fields" that are -#: recognized by epydoc; and whose values are the corresponding epydoc -#: field names that should be used for the individual fields. CONSOLIDATED_FIELDS = { 'parameters': 'param', 'arguments': 'arg', From 28f01e76ef4d20b8c6b422247830e7dd9a227d3d Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Mon, 16 Sep 2024 11:17:56 -0400 Subject: [PATCH 37/40] Fix typing --- pydoctor/astbuilder.py | 4 ++-- pydoctor/astutils.py | 24 +++++++++++++----------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index 0ca65a769..81262cbd3 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -751,14 +751,14 @@ def _handleAssignment(self, else: raise IgnoreAssignment() - def _handleDocComment(self, node: ast.Assign | ast.AnnAssign, target: ast.expr): + def _handleDocComment(self, node: ast.Assign | ast.AnnAssign, target: ast.expr) -> None: # Process the doc-comments, this is very similiar to the inline docstrings. try: parent, name = self._contextualizeTarget(target) except ValueError: return - # fetch the target of the inline docstring + # fetch the target of the doc-comment if (attr:=parent.contents.get(name)) is None: return diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index 380465817..17b992ac9 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -836,18 +836,18 @@ def __repr__(self) -> str: self.value.strip()) class TokenProcessor: - def __init__(self, buffers: List[str]) -> None: + def __init__(self, buffers: Sequence[str]) -> None: lines = iter(buffers) self.buffers = buffers self.tokens = generate_tokens(lambda: next(lines)) - self.current = None # type: Token - self.previous = None # type: Token + self.current: Token | None = None + self.previous: Token | None = None def get_line(self, lineno: int) -> str: """Returns specified line.""" return self.buffers[lineno - 1] - def fetch_token(self) -> Token: + def fetch_token(self) -> Token | None: """Fetch a next token from source code. Returns ``None`` if sequence finished. @@ -867,6 +867,7 @@ def fetch_until(self, condition: Any) -> List[Token]: """ tokens = [] while self.fetch_token(): + assert self.current tokens.append(self.current) if self.current == condition: break @@ -887,14 +888,15 @@ class AfterCommentParser(TokenProcessor): and returns the comments for variable if exists. """ - def __init__(self, lines: List[str]) -> None: + def __init__(self, lines: Sequence[str]) -> None: super().__init__(lines) - self.comment = None # type: str + self.comment: str | None = None - def fetch_rvalue(self) -> List[Token]: + def fetch_rvalue(self) -> Sequence[Token]: """Fetch right-hand value of assignment.""" - tokens = [] + tokens: list[Token] = [] while self.fetch_token(): + assert self.current tokens.append(self.current) if self.current == [OP, '(']: tokens += self.fetch_until([OP, ')']) @@ -914,7 +916,7 @@ def fetch_rvalue(self) -> List[Token]: def parse(self) -> None: """Parse the code and obtain comment after assignment.""" # skip lvalue (or whole of AnnAssign) - while not self.fetch_token().match([OP, '='], NEWLINE, COMMENT): + while (current:=self.fetch_token()) and not current.match([OP, '='], NEWLINE, COMMENT): assert self.current # skip rvalue (if exists) @@ -922,6 +924,7 @@ def parse(self) -> None: self.fetch_rvalue() if self.current == COMMENT: + assert self.current self.comment = self.current.value comment_re = re.compile('^\\s*#: ?(.*)\r?\n?$') @@ -938,8 +941,7 @@ def extract_doc_comment_after(node: ast.Assign | ast.AnnAssign, lines: Sequence[ """ # check doc comments after assignment current_line = lines[node.lineno - 1] - parser = AfterCommentParser([current_line[node.col_offset:]] + - lines[node.lineno:]) + parser = AfterCommentParser([current_line[node.col_offset:], *lines[node.lineno:]]) parser.parse() if parser.comment and comment_re.match(parser.comment): docstring = comment_re.sub('\\1', parser.comment) From ec682e51598d3a14d8d1cb007709d53e9fd072f5 Mon Sep 17 00:00:00 2001 From: tristanlatr <19967168+tristanlatr@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:42:09 +0200 Subject: [PATCH 38/40] Add missing future import --- pydoctor/test/test_astbuilder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index 6fd8aae67..ecbf9736e 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Optional, Tuple, Type, List, overload, cast import ast import sys From ef708aa7fb920476d45991a29b81afac2fd7849a Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sun, 22 Sep 2024 19:43:34 -0400 Subject: [PATCH 39/40] Fix issue #813 --- pydoctor/astbuilder.py | 14 +++++++----- pydoctor/astutils.py | 14 +++++++++++- pydoctor/test/test_astbuilder.py | 38 ++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 7 deletions(-) diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index ec318a6f6..b369c0e3c 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -18,7 +18,8 @@ from pydoctor.epydoc.markup._pyval_repr import colorize_inline_pyval from pydoctor.astutils import (is_none_literal, is_typing_annotation, is_using_annotations, is_using_typing_final, node2dottedname, node2fullname, is__name__equals__main__, unstring_annotation, upgrade_annotation, iterassign, extract_docstring_linenum, infer_type, get_parents, - get_docstring_node, get_assign_docstring_node, extract_doc_comment_before, extract_doc_comment_after, unparse, NodeVisitor, Parentage, Str) + get_docstring_node, get_assign_docstring_node, extract_doc_comment_before, extract_doc_comment_after, validate_inline_docstring_node, + unparse, NodeVisitor, Parentage, Str) def parseFile(path: Path) -> tuple[ast.Module, Sequence[str]]: """ @@ -857,11 +858,12 @@ def _handleInlineDocstrings(self, assign:Union[ast.Assign, ast.AnnAssign], targe return docstring_node = get_assign_docstring_node(assign) - if docstring_node: - # fetch the target of the inline docstring - attr = parent.contents.get(name) - if attr: - attr.setDocstring(docstring_node) + # Validate the docstring, it's not valid if there is a comment in between... + if docstring_node and validate_inline_docstring_node(assign, + docstring_node, self.builder.lines_collection[self.module] + # fetch the target of the inline docstring + ) and (attr:=parent.contents.get(name)): + attr.setDocstring(docstring_node) def visit_AugAssign(self, node:ast.AugAssign) -> None: try: diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index 33afaa0f6..0f3b01fc3 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -242,6 +242,9 @@ def get_assign_docstring_node(assign:ast.Assign | ast.AnnAssign) -> Str | None: This helper function relies on the non-standard C{.parent} attribute on AST nodes to navigate upward in the tree and determine this node direct siblings. + + @note: This does not validate whether there is a comment in between the assigment and the + docstring node since the function operates on AST solely. Use L{validate_inline_docstring_node} for that. """ # this call raises an ValueError if we're doing something nasty with the ast... please report parent_node, fieldname = get_node_block(assign) @@ -984,4 +987,13 @@ def extract_doc_comment_before(node: ast.Assign | ast.AnnAssign, lines: Sequence return None -# This was part of the sphinx.pycode.parser module. \ No newline at end of file +# This was part of the sphinx.pycode.parser module. + +def validate_inline_docstring_node(node: ast.Assign | ast.AnnAssign, + docstring: Str, + lines: Sequence[str]) -> bool: + """ + Returns False if the docstring node associated with the given assignment node is not valid. + """ + start, stop = node.lineno, docstring.lineno - 1 + return not any(lines[i].lstrip().startswith('#') for i in range(start, stop)) diff --git a/pydoctor/test/test_astbuilder.py b/pydoctor/test/test_astbuilder.py index ecbf9736e..afe0e1f74 100644 --- a/pydoctor/test/test_astbuilder.py +++ b/pydoctor/test/test_astbuilder.py @@ -3029,3 +3029,41 @@ def test_inline_docstring_at_wrong_place(systemcls: Type[model.System], capsys: assert not mod.contents['c'].docstring assert not mod.contents['d'].docstring assert not mod.contents['e'].docstring + + +@systemcls_param +def test_inline_docstring_is_invalid_when_there_is_a_comment_in_between(systemcls: Type[model.System], capsys: CapSys) -> None: + src = ''' + a = True + # + 'not documentation' + + b = True + # b = False + 'not documentation' + + c = True + + # c = False + + 'not documentation' + + d = True + + # d = False + + """ + not documentation + """ + + e = True + # e = False + """ + not documentation + """ + ''' + + mod = fromText(src, systemcls=systemcls) + assert not capsys.readouterr().out + for o in 'abcde': + assert not mod.contents[o].docstring \ No newline at end of file From ed1fc412fc5d72047cb7995de0d794080fb84f2b Mon Sep 17 00:00:00 2001 From: tristanlatr Date: Sun, 22 Sep 2024 19:54:15 -0400 Subject: [PATCH 40/40] Add a changelog entry --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index 64a44c861..9659a5d32 100644 --- a/README.rst +++ b/README.rst @@ -85,6 +85,8 @@ in development * Replace the deprecated dependency appdirs with platformdirs. * Fix WinError caused by the failure of the symlink creation process. Pydoctor should now run on windows without the need to be administrator. +* Fix a bug with inline docstrings, if there is comment in between an assignment and literal string, + the string will not be misinterpreted as being a docstring. pydoctor 24.3.3 ^^^^^^^^^^^^^^^