diff --git a/dissect/cstruct/parser.py b/dissect/cstruct/parser.py index 64b2447..0394a00 100644 --- a/dissect/cstruct/parser.py +++ b/dissect/cstruct/parser.py @@ -299,11 +299,11 @@ def _remove_comments(string: str) -> str: # second group captures comments (//single-line or /* multi-line */) regex = re.compile(pattern, re.MULTILINE | re.DOTALL) - def _replacer(match): + def _replacer(match: re.Match) -> str: # if the 2nd group (capturing comments) is not None, # it means we have captured a non-quoted (real) comment string. - if match.group(2) is not None: - return "" # so we will return empty to remove the comment + if comment := match.group(2): + return "\n" * comment.count("\n") # so we will return empty to remove the comment else: # otherwise, we will return the 1st group return match.group(1) # captured quoted-string @@ -314,7 +314,7 @@ def _lineno(tok: Token) -> int: """Quick and dirty line number calculator""" match = tok.match - return match.string.count("\n", 0, match.start()) + return match.string.count("\n", 0, match.start()) + 1 def _config_flag(self, tokens: TokenConsumer) -> None: flag_token = tokens.consume() diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 0000000..bdc408b --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,26 @@ +from unittest.mock import Mock + +from dissect.cstruct.parser import TokenParser + + +def test_preserve_comment_newlines(): + cdef = """ + // normal comment + #define normal_anchor + /* + * Multi + * line + * comment + */ + #define multi_anchor + """ + data = TokenParser._remove_comments(cdef) + print(repr(data)) + + mock_token = Mock() + mock_token.match.string = data + mock_token.match.start.return_value = data.index("#define normal_anchor") + assert TokenParser._lineno(mock_token) == 3 + + mock_token.match.start.return_value = data.index("#define multi_anchor") + assert TokenParser._lineno(mock_token) == 9