From 3a2a6b03cf28d4a004f2cd4b11ce95580f217bd6 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Tue, 27 Feb 2024 10:10:16 +0000 Subject: [PATCH] Add stub information on types --- dissect/cstruct/cstruct.py | 21 +++++--- dissect/cstruct/tools/stubify.py | 80 +++++++++++++++++++++++++++--- dissect/cstruct/types/base.py | 19 +++++-- dissect/cstruct/types/enum.py | 14 ++++++ dissect/cstruct/types/packed.py | 5 ++ dissect/cstruct/types/pointer.py | 14 ++++-- dissect/cstruct/types/structure.py | 22 +++++--- pyproject.toml | 3 ++ tests/test_stub.py | 76 ++++++++++++++++++++++++---- 9 files changed, 213 insertions(+), 41 deletions(-) diff --git a/dissect/cstruct/cstruct.py b/dissect/cstruct/cstruct.py index ab7d6ef..a2b8bf4 100644 --- a/dissect/cstruct/cstruct.py +++ b/dissect/cstruct/cstruct.py @@ -5,6 +5,7 @@ import struct import sys import types +from textwrap import indent from typing import Any, BinaryIO, Iterator, Optional from typing import Union as UnionHint @@ -397,19 +398,23 @@ def _make_union( ) -> type[Structure]: return self._make_struct(name, fields, align=align, anonymous=anonymous, base=Union) - def to_stub(self, name: str = ""): - output_data = io.StringIO() + def to_stub(self, name: str = "", packed: bool = True): + buffer = io.StringIO() + indentation = "" + if name: + buffer.write(f"class {name}(cstruct):\n") + indentation = " " * 4 for const, value in self.consts.items(): - output_data.write(f"{const}: {type(value).__name__}=...\n") + buffer.write(indent(f"{const}: {type(value).__name__}=...\n", prefix=indentation)) for name, type_def in self.typedefs.items(): - if not isinstance(type_def, str): - output_data.write(type_def.to_stub(name)) - output_data.write("\n") + if isinstance(type_def, MetaType) and (text := type_def.to_stub(name)): + buffer.write(indent(type_def.to_stub(name), prefix=indentation)) + buffer.write("\n") - output_value = output_data.getvalue() - output_data.close() + output_value = buffer.getvalue() + buffer.close() return output_value diff --git a/dissect/cstruct/tools/stubify.py b/dissect/cstruct/tools/stubify.py index 65b84cc..4883a16 100644 --- a/dissect/cstruct/tools/stubify.py +++ b/dissect/cstruct/tools/stubify.py @@ -1,23 +1,91 @@ # Searches and creates a stub of a cstruct definitions +import importlib +import importlib.util +import io +import logging +import sys from argparse import ArgumentParser -from importlib import import_module +from contextlib import contextmanager from pathlib import Path +from dissect.cstruct import cstruct -def stubify_file(path: Path): - ... +log = logging.getLogger(__name__) + + +def load_module(path: Path, base_path: Path): + module = None + try: + relative_path = path.relative_to(base_path) + module_tuple = (*relative_path.parent.parts, relative_path.stem) + spec = importlib.util.spec_from_file_location(".".join(module_tuple), path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + except Exception as e: + log.error("Unable to import %s", path) + log.debug("Error while trying to import module %s", path, exc_info=e) + + +def stubify_file(path: Path, base_path: Path): + buffer = io.StringIO() + + buffer.write("from dissect.cstruct.types import *\n") + prev_offset = buffer.tell() + + tmp_module = load_module(path, base_path) + if tmp_module is None: + return "" + + for name, variable in tmp_module.__dict__.items(): + if name.startswith("__"): + continue + + if isinstance(variable, cstruct): + if variable._module != tmp_module.__name__: + continue + buffer.write(variable.to_stub(name)) + + output = buffer.getvalue() + if buffer.tell() == prev_offset: + output = "" + + buffer.close() + + return output + + +def setup_logger(verbosity: int) -> None: + if verbosity == 0: + log.setLevel(level=logging.WARNING) + elif verbosity == 1: + log.setLevel(level=logging.INFO) + elif verbosity > 1: + log.setLevel(level=logging.DEBUG) def main(): parser = ArgumentParser("stubify") - parser.add_argument("path", type=Path, required=True) + parser.add_argument("path", type=Path) + parser.add_argument("-v", "--verbose", action="count", default=0) args = parser.parse_args() + setup_logger(args.verbose) + file_path: Path = args.path - for file in file_path.glob("*.py"): + iterator = file_path.rglob("*.py") + if file_path.is_file(): + iterator = [file_path] + + for file in iterator: if file.is_file() and ".py" in file.suffixes: - stubify_file(file) + stub = stubify_file(file, file_path) + if not stub: + continue + + with file.with_suffix(".pyi").open("wt") as output_file: + output_file.write(stub) if __name__ == "__main__": diff --git a/dissect/cstruct/types/base.py b/dissect/cstruct/types/base.py index 1f23ee6..e2ce90d 100644 --- a/dissect/cstruct/types/base.py +++ b/dissect/cstruct/types/base.py @@ -177,12 +177,14 @@ def _write_0(cls, stream: BinaryIO, array: list[BaseType]) -> int: """ return cls._write_array(stream, array + [cls()]) - def to_stub(cls, name: str = "") -> str: - output_str = "" - if bases := getattr(cls, "__bases__", None): - output_str = bases[0].__name__ + def _class_stub(cls) -> str: + return f"class {cls.__name__}({cls.__base__.__name__}):\n" - return f"{name}: {output_str}" + def _type_stub(cls, name: str = "") -> str: + return f"{name}: {cls.__name__}" + + def to_stub(cls, name: str) -> str: + return "" class _overload: @@ -240,6 +242,9 @@ def _read(cls, stream: BinaryIO, context: dict[str, Any] = None) -> Array: def default(cls) -> BaseType: return [cls.type.default() for _ in range(0 if cls.dynamic or cls.null_terminated else cls.num_entries)] + def _type_stub(cls, name: str = "") -> str: + return f"{name}: {cls.__base__.__name__}" + class Array(list, BaseType, metaclass=ArrayMetaType): """Implements a fixed or dynamically sized array type. @@ -265,3 +270,7 @@ def _write(cls, stream: BinaryIO, data: list[Any]) -> int: raise ArraySizeError(f"Expected static array size {cls.num_entries}, got {actual_size} instead.") return cls.type._write_array(stream, data) + + @classmethod + def _type_stub(cls, name: str = ""): + return f"{name}: {cls.__base__.__name__}[{cls.type.__name__}]" diff --git a/dissect/cstruct/types/enum.py b/dissect/cstruct/types/enum.py index 1d3b2e6..a70671a 100644 --- a/dissect/cstruct/types/enum.py +++ b/dissect/cstruct/types/enum.py @@ -1,5 +1,6 @@ from __future__ import annotations +import io import sys from enum import EnumMeta, IntEnum, IntFlag from typing import TYPE_CHECKING, Any, BinaryIO, Optional, Union @@ -73,6 +74,19 @@ def _write_0(cls, stream: BinaryIO, array: list[BaseType]) -> int: data = [entry.value if isinstance(entry, Enum) else entry for entry in array] return cls._write_array(stream, data + [cls.type()]) + def _class_stub(cls) -> str: + return f"class {cls.__name__}({cls.__base__.__name__}, {cls.type.__name__}):\n" + + def to_stub(cls, name: str = "") -> str: + output = "" + with io.StringIO() as buf: + buf.write(cls._class_stub()) + for key in cls.__members__.keys(): + buf.write(f" {key} = ...\n") + output = buf.getvalue() + + return output + def _fix_alias_members(cls: type[Enum]): # Emulate aenum NoAlias behaviour diff --git a/dissect/cstruct/types/packed.py b/dissect/cstruct/types/packed.py index ec42c23..f449397 100644 --- a/dissect/cstruct/types/packed.py +++ b/dissect/cstruct/types/packed.py @@ -63,3 +63,8 @@ def _write(cls, stream: BinaryIO, data: Packed) -> int: @classmethod def _write_array(cls, stream: BinaryIO, data: list[Packed]) -> int: return stream.write(_struct(cls.cs.endian, f"{len(data)}{cls.packchar}").pack(*data)) + + @classmethod + def to_stub(cls, name: str): + types = ", ".join([x.__name__ for x in cls.__bases__]) + return f"{name}= type[{types}]" diff --git a/dissect/cstruct/types/pointer.py b/dissect/cstruct/types/pointer.py index f79d86d..8dae106 100644 --- a/dissect/cstruct/types/pointer.py +++ b/dissect/cstruct/types/pointer.py @@ -1,17 +1,19 @@ from __future__ import annotations -from typing import Any, BinaryIO +from typing import Any, BinaryIO, Generic, TypeVar from dissect.cstruct.exceptions import NullPointerDereference from dissect.cstruct.types.base import BaseType, MetaType from dissect.cstruct.types.char import Char from dissect.cstruct.types.void import Void +T = TypeVar("T", bound=MetaType) -class Pointer(int, BaseType): + +class Pointer(int, BaseType, Generic[T]): """Pointer to some other type.""" - type: MetaType + type: T _stream: BinaryIO _context: dict[str, Any] _value: BaseType @@ -73,7 +75,7 @@ def _read(cls, stream: BinaryIO, context: dict[str, Any] = None) -> Pointer: def _write(cls, stream: BinaryIO, data: int) -> int: return cls.cs.pointer._write(stream, data) - def dereference(self) -> Any: + def dereference(self) -> T: if self == 0: raise NullPointerDereference() @@ -93,3 +95,7 @@ def dereference(self) -> Any: self._value = value return self._value + + @classmethod + def _type_stub(cls, name: str = "") -> str: + return f"{name}: {cls.__base__.__name__}[{cls.type.__name__}]" diff --git a/dissect/cstruct/types/structure.py b/dissect/cstruct/types/structure.py index ce0ee99..53bc9ba 100644 --- a/dissect/cstruct/types/structure.py +++ b/dissect/cstruct/types/structure.py @@ -27,6 +27,9 @@ def __repr__(self) -> str: bits_str = f" : {self.bits}" if self.bits else "" return f"" + def type_stub(self): + return self.type._type_stub(self.name) + class StructureMetaType(MetaType): """Base metaclass for cstruct structure type classes.""" @@ -364,15 +367,20 @@ def commit(cls) -> None: def to_stub(cls, name: str = ""): with io.StringIO() as data: - data.write(f"class {cls.__name__}:\n") + data.write(f"class {cls.__name__}({cls.__base__.__name__}):\n") call_args = ["self"] - for field in cls.__fields__: - if not getattr(field.type, "__anonymous__", False): - type_info = f"{field.name}{field.type.to_stub()}" - call_args.append(f"{type_info}=...") - data.write(indent(f"{type_info}\n", prefix=" " * 4)) + for key, field in cls.lookup.items(): + if isinstance(field.type, StructureMetaType): + class_info = field.type.to_stub() + data.write(indent(class_info, prefix=" " * 4)) + call_args.append(f"{field.type_stub()}=...") + + for field in cls.fields.values(): + type_info = field.type_stub() + data.write(indent(f"{type_info}\n", prefix=" " * 4)) + call = ", ".join(call_args) - data.write(indent(f"def __call__({call}): ...", prefix=" " * 4)) + data.write(indent(f"def __init__({call}): ...\n", prefix=" " * 4)) return data.getvalue() diff --git a/pyproject.toml b/pyproject.toml index a258e23..5e2c50b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,9 @@ homepage = "https://dissect.tools" documentation = "https://docs.dissect.tools/en/latest/projects/dissect.cstruct" repository = "https://github.com/fox-it/dissect.cstruct" +[project.scripts] +stubify = "dissect.cstruct.tools.stubify:main" + [tool.black] line-length = 120 diff --git a/tests/test_stub.py b/tests/test_stub.py index 015946e..74785fa 100644 --- a/tests/test_stub.py +++ b/tests/test_stub.py @@ -18,10 +18,10 @@ """, "Test", """ - class Test: - a: int - b: int - def __call__(self, a: int=..., b: int=...): ... + class Test(Structure): + a: int32 + b: int32 + def __init__(self, a: int32=..., b: int32=...): ... """, ), ( @@ -32,9 +32,9 @@ def __call__(self, a: int=..., b: int=...): ... """, "Test", """ - class Test: - a: Array - def __call__(self, a: Array=...): ... + class Test(Structure): + a: Array[int32] + def __init__(self, a: Array[int32]=...): ... """, ), ( @@ -50,20 +50,74 @@ def __call__(self, a: Array=...): ... c: str=... """, ), + ( + """ + struct Test { + int *a; + } + """, + "Test", + """ + class Test(Structure): + a: Pointer[int32] + def __init__(self, a: Pointer[int32]=...): ... + """, + ), + ( + """ + enum Test { + A = 1, + B = 2, + C = 2 + }; + """, + "Test", + """ + class Test(Enum, uint32): + A = ... + B = ... + C = ... + """, + ), + ( + """ + flag Test { + A = 0x00001, + B = 0x00002, + C = 0x00004 + }; + """, + "Test", + """ + class Test(Flag, uint32): + A = ... + B = ... + C = ... + """, + ), ( """ struct Test{ union { - int a; - int b; + wchar a[]; + char b[]; } } """, "Test", - """""", + """ + class Test(Structure): + class __anonymous_0__(Union): + a: WcharArray + b: CharArray + def __init__(self, a: WcharArray=..., b: CharArray=...): ... + a: WcharArray + b: CharArray + def __init__(self, __anonymous_0__: __anonymous_0__=...): ... + """, ), ], - ids=["standard structure", "array", "definitions", "unions"], + ids=["standard structure", "array", "definitions", "pointers", "enums", "flags", "unions"], ) def test_to_stub(definition: str, name: str, expected_stub: str): structure = cstruct()