fox-it · Schamper · Aug 4, 2023 · Aug 2, 2023 · Aug 4, 2023
diff --git a/dissect/cstruct/parser.py b/dissect/cstruct/parser.py
@@ -299,11 +299,11 @@ def _remove_comments(string: str) -> str:
         # second group captures comments (//single-line or /* multi-line */)
         regex = re.compile(pattern, re.MULTILINE | re.DOTALL)
 
-        def _replacer(match):
+        def _replacer(match: re.Match) -> str:
             # if the 2nd group (capturing comments) is not None,
             # it means we have captured a non-quoted (real) comment string.
-            if match.group(2) is not None:
-                return ""  # so we will return empty to remove the comment
+            if comment := match.group(2):
+                return "\n" * comment.count("\n")  # so we will return empty to remove the comment
             else:  # otherwise, we will return the 1st group
                 return match.group(1)  # captured quoted-string
 
@@ -314,7 +314,7 @@ def _lineno(tok: Token) -> int:
         """Quick and dirty line number calculator"""
 
         match = tok.match
-        return match.string.count("\n", 0, match.start())
+        return match.string.count("\n", 0, match.start()) + 1
 
     def _config_flag(self, tokens: TokenConsumer) -> None:
         flag_token = tokens.consume()

diff --git a/tests/test_parser.py b/tests/test_parser.py
@@ -0,0 +1,26 @@
+from unittest.mock import Mock
+
+from dissect.cstruct.parser import TokenParser
+
+
+def test_preserve_comment_newlines():
+    cdef = """
+    // normal comment
+    #define normal_anchor
+    /*
+     * Multi
+     * line
+     * comment
+     */
+    #define multi_anchor
+    """
+    data = TokenParser._remove_comments(cdef)
+    print(repr(data))
+
+    mock_token = Mock()
+    mock_token.match.string = data
+    mock_token.match.start.return_value = data.index("#define normal_anchor")
+    assert TokenParser._lineno(mock_token) == 3
+
+    mock_token.match.start.return_value = data.index("#define multi_anchor")
+    assert TokenParser._lineno(mock_token) == 9