diff --git a/docs_store/opcodes/avr_xml_gen/.gitignore b/docs_store/opcodes/avr_xml_gen/.gitignore new file mode 100644 index 00000000..4ab73ef6 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/.gitignore @@ -0,0 +1,3 @@ +intermediate_data/ +*.log +__pycache__ diff --git a/docs_store/opcodes/avr_xml_gen/AVR-Instruction-Set-Manual-DS40002198A.pdf b/docs_store/opcodes/avr_xml_gen/AVR-Instruction-Set-Manual-DS40002198A.pdf new file mode 100644 index 00000000..0f2903cf Binary files /dev/null and b/docs_store/opcodes/avr_xml_gen/AVR-Instruction-Set-Manual-DS40002198A.pdf differ diff --git a/docs_store/opcodes/avr_xml_gen/README.md b/docs_store/opcodes/avr_xml_gen/README.md new file mode 100644 index 00000000..e28289d4 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/README.md @@ -0,0 +1,160 @@ +This is a program for generating avr instructions xml + +The rogram uses the avr datasheet, which you can find under [AVR-Instruction-Set-Manual-DS40002198A.pdf](https://ww1.microchip.com/downloads/en/DeviceDoc/AVR-InstructionSet-Manual-DS40002198.pdf) + +The whole programm is split into 4 stages: + +1. Data markers +2. Data extraction +3. Data analysis +4. Xml generation + +For gui, [tkinter](https://docs.python.org/3/library/tkinter.html) is used. + +# 1. Data managinig and structuring + +Stored in `core/data_processing`. +There are series of unfortunate titles such as `data_processing` or `data_processing.data_management`, +because they don't point to the exact purpose of classes. This can be changed in +the future, but so far, fate has decided. + +## 1.1 Storing instruction and its forms + +Each instruction form has several aspects. They are: + +1. Mnemonic ('name' in xml) +2. Version (version of avr architecture) +3. Description ('summary' in xml) +4. Operands +5. SREG (status registers) +6. Opcode + +I extracted aspects 1, 3, and 4 from tables and 1, 2, 5, and 6 from chapters (datasheet, +chapter 5). This is merged, so now 3 and 4 is stored as `table data`', while 5 and +6 are stored as `chapter data`. Sometimes you can see subchapter instead of chapter +because at the very beginning I wanted to name it 'subchapter'. In some cases, division +into the chapter and subchapter makes sence (in the data extraction stage). Even +though 2 (version) is extracted from a chapter, the version is chapter-data independent, +and it was just convenient to extract versions from chapter. + +There is the class `data_management.InstructionForm` for storing `mnemonic`, `version`, +`table_data` and `chapter_data`. It inherits `data_management.StrictDictionary` +which is the same as default dictionary, but with fixed keys. This is helpful, +as python points tells if there are wrong or missing keys. + +Instruction is stored as a dictionary with `str -> list[InstructionForm]`, where +`str` is the instruction name. + +## 1.2 Context + +`data_management.Context` stores all data that could be used by stages. It stores +instruction data markers, instruction data extracted from datasheet, parsed instruction +data (instruction forms), and even the datasheet file name. + +Context has dictionary and works in the key-value concept, but with one feature: +keys are data type. So it could store only one copy of particular data. This was +better than a dictionary with str names or something else. During development, it +became clear that this needed to be changed for Aliases. + +Some unique data (e.g `ExtractedInstructionData`, `ProcessedInstructionData`) could +just be an `Alias`, but they would have been the same key of type `Alias` if I have +left as it was. A future improvement could be to make context distinguish different +aliases as keys. + +## 1.3 Ambiguous data + +In `ambiguous_data.py` you can see several default data types analogs (`dict`, +`list`, `forzenset`) but with the 'Ambiguous' prefix. + +During the data extraction it became clear that it is useful to determine if a list +or set has more than one object. It was too often so I decided to create the same +classes but with a `is_ambiguous()` method and special output: it prints 'Ambiguous' +if data is ambiguous. The dictionary is ambiguous if it has ambiguous value or key. +The whole extracted instruction data is `AmbiguousDict[AmbiguousFrozen, AmbiguousList]`. + +## 1.4 Other random classes + +Observer and Subject: +I used the observer pattern for stages, so I just shoved these classes here :) + +DataManager: +Is used by stage (2). It has `request()` method. The implementation depends on a +particular class. The biggest example of this is in `core/stages/data_analysis/instruction_forms_manager.py`. + +# 2. Stages + +All related to stages is stored in `core/stages`. + +## 2.1 The main tasks of the stages + +1. The data markers stage customizes the instruciton data markers that determine + what is useful data in the datasheet. It isn't used at this time because data extraction + stage isn't completed. Details are written in '2 Data extraction'. If you want to + see what is created for data markers, replace `app.after(100, stage2.execute)` with + `app.after(100, stage1.execute)`. This is a demonstration of a field selector. It + can catch pdf objects like images, tables and words and access to all of their attributes. + +2. The data extraction stage uses data markers created in first stage to extract + instruction data from the datasheet. This stage doesn't have a modular structure + and doesn't use markers, so it should be remade. + +3. Data analysis converts extracted data for direct xml compilation. This data is + a dictionary with instruction names as keys and lists of instruction forms as values. + Datasheet data isn't always ambiguous, which can be resolved by two ways: manually + by selecting needed options or programmatically by adding code to handle specific + cases. The first option is preferred. + +4. Xml generation speaks for itself. It uses processed data from the data analysis + stage. + +## 2.2 Stage concept itself + +Basic stage classes stored in `stage.py` + +Summary: + +The whole programm is split into stages. Each stage has next stage (it could be None), +`execute()`, and `try_complete()` methods. It require `Context` (1.2) for initialization. + +At the very beginning we `execute()` the first stage. By default, the stage tries +to be completed by `try_complete()` after the `<>` event. If it is completed, +the stage executes the next stage. Otherwise, its work is continued. + +`Stage` also has a `permanently_completed` variant. This is needed for `BidirectionStage`, +which could go forwards and back. If `Stage` is permanently completed, it can't be +`execute()`d. + +More details: +In addition to the `Stage` class, there is also `StageTask`, `StageGUI` and `BidirectionalStage`. +`StageTask` is used by `try_complete()` to check if completion conditions are satisfied +by method `is_completed()`. It also requires `Context`. + +`StageGUI` is used if we need gui :). It has `enable()` and `disable()` methods. +It requires `DataManager`. (1.4) `Stage.execute()` calls `StageGUIStage` GUI generates +a `<>` event. After generating this event, `Stage` executes `try_complete()`. + +In fact, `StageTask` and `StageGUI` are optional. See `xml_generation.py`. + +`BidirectionalStage` is the same as default stage, except that it also has the previous_stage. +It inherits `Stage`. + +## 2.3 Stages implementation + +1. Data markers has one stage `SettingHeader`. It determines the height of the pdf + header. This is just a demonstration. For gui,`PDFRegionSelector`is used. + +2. Data extraction has one big process of data extraction, no more. As I mentioned + in 2.1, it could be cleaned up. It has gui with a 'waiting' caption. + +3. Data analysis has `InstructionBuilding` class. It is `BidirectionalStage`. An + `InstructionBuilding` instance is created for each instruction. It automatically + generates one singular instruction form and completes itself, if instruction data + isn't ambiguous (not `is_ambiguous()`, see `AmbiguousData` in 1.3), otherwise its + GUI is opened. In the GUI, the user creates forms and selects correct form aspects. + A future imrprovement could be adding saving for selected options. In `app.py` you + can see `InstructionBuildingInitializing`. It is needed for creating `InstructionBuilding` + instances. + +4. Xml generation has `XMLGeneration` which uses `InstructionXMLBuilder`. It is a + realy simple stage even without GUI and Task (task should be added to check if all + is good) diff --git a/docs_store/opcodes/avr_xml_gen/app.py b/docs_store/opcodes/avr_xml_gen/app.py new file mode 100644 index 00000000..032e0be6 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/app.py @@ -0,0 +1,99 @@ +from core.logging import setup_logging +setup_logging() + +from core.data_processing import Context, InstructionDataMarkers, SourceInfo, ExtractedInstructionsData, ProcessedInstructionsData +from core.stages.data_markers import SettingHeader +from core.stages.data_extraction import DataExtraction +from core.stages.data_analysis import InstructionBuilding +from core.stages.xml_generation import XMLGeneration +from core.stages import Stage, BidirectionalStage, StageGUI, StageTask +from core.data_processing.data_management import DataManager +import tkinter as tk +import os + + +class InstructionBuildingInitializingTask(StageTask): + def is_completed(self) -> bool: + return True + + +class InstructionBuildingInitializingGUI(StageGUI): + def __init__(self, master=None, cnf=..., **kwargs) -> None: + super().__init__(None, master, cnf, **kwargs) + self.__setup_ui() + self.__setup_bindings() + + def __setup_ui(self): + self._label = tk.Label(self, text='Now we need to make up the forms\n' \ + 'Some instructions data is ambiguous\n' \ + 'In that case you need to make instruction froms manually' + ) + self._button = tk.Button(self, text='Resolve ambiguoity') + self._label.pack(side=tk.TOP, fill=tk.BOTH, expand=True) + self._button.pack(side=tk.BOTTOM, fill=tk.X, expand=True) + + def __setup_bindings(self): + self._button.config(command=lambda: self.event_generate('<>')) + + +class InstructionBuildingInitializing(Stage): + def __init__(self, stages: set[Stage], context: Context, master=None, cnf=..., **kwargs) -> None: + super().__init__(context, master, cnf, **kwargs) + self.set_gui(InstructionBuildingInitializingGUI()) + self.set_task(InstructionBuildingInitializingTask(self._context)) + self._stages = stages + + def execute(self) -> None: + extracted_data = self._context[ExtractedInstructionsData] + stages: list[BidirectionalStage] = [] + for instruction_name in extracted_data.keys(): + stages.append(InstructionBuilding(instruction_name, self._context)) + for i in range(1, len(stages)): + stages[i-1].set_next(stages[i]) + stages[i].set_previous(stages[i-1]) + stages[-1].set_next(self._next_stage) + self._next_stage = stages[0] + self._stages.update(stages) + if self._gui is not None: + self._gui.enable(expand=True) + + +class EndStageGUI(StageGUI): + def __init__(self, data_manager: DataManager | None = None, master=None, cnf=..., **kwargs) -> None: + super().__init__(data_manager, master, cnf, **kwargs) + self.label = tk.Label(self, text='Work is completed') + self.label.pack() + + +class EndStage(Stage): + def __init__(self, context: Context, master=None, cnf=..., **kwargs) -> None: + super().__init__(context, master, cnf, **kwargs) + self._gui = EndStageGUI() + + +def main(): + app = tk.Tk() + + context = Context() + context.record(ExtractedInstructionsData()) + context.record(ProcessedInstructionsData()) + source_info = SourceInfo() + source_info['pdf_path'] = os.path.abspath('./AVR-Instruction-Set-Manual-DS40002198A.pdf') + context.record(source_info) + context.record(InstructionDataMarkers()) + + stages: set[Stage] = set() + stage1 = SettingHeader(context) + stage2 = DataExtraction(context) + stage3 = InstructionBuildingInitializing(stages, context) + stage4 = XMLGeneration(context) + stage5 = EndStage(context) + + stage1.set_next(stage2) + stage2.set_next(stage3) + stage3.set_next(stage4) + stage4.set_next(stage5) + app.after(100, stage2.execute) + app.mainloop() + +main() diff --git a/docs_store/opcodes/avr_xml_gen/core/data_processing/__init__.py b/docs_store/opcodes/avr_xml_gen/core/data_processing/__init__.py new file mode 100644 index 00000000..53ae4be4 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/data_processing/__init__.py @@ -0,0 +1,2 @@ +from .ambiguous_data import * +from .data_management import * diff --git a/docs_store/opcodes/avr_xml_gen/core/data_processing/ambiguous_data.py b/docs_store/opcodes/avr_xml_gen/core/data_processing/ambiguous_data.py new file mode 100644 index 00000000..30983a09 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/data_processing/ambiguous_data.py @@ -0,0 +1,92 @@ +from typing import Optional, Collection, Iterable +from colorama import Fore, init +from abc import ABC, abstractmethod + +init(autoreset = True) + + +class AmbiguousData(ABC): + @abstractmethod + def is_ambiguous(self) -> bool: + pass + + +class AmbiguousList[T](list[T], AmbiguousData): + def __init__(self, single_data: Optional[T] = None): + if single_data is None: + super().__init__() + else: + super().__init__([single_data]) + + def append(self, single_data: T) -> None: + if single_data in self: + return + else: + super().append(single_data) + + def extend(self, iterable: Iterable[T]) -> None: + new_data = super().extend(iterable) + super().clear + if not new_data: + return + new_data = set(new_data) + super().extend(new_data) + + def is_ambiguous(self) -> bool: + if not self: + return False + elif any(isinstance(single_data, AmbiguousData) for single_data in self): + return True + return len(self) > 1 + + def __repr__(self) -> str: + if self.is_ambiguous(): + return Fore.RED + 'Ambiguous:' + Fore.RESET + f'{super().__repr__()}' + elif not self: + return f'Empty' + else: + return f'{self[0]}' + + +class AmbiguousDict[S, T](dict[S, T], AmbiguousData): + def is_ambiguous(self) -> bool: + if not self: + return False + elif any(isinstance(single_data, AmbiguousData) and single_data.is_ambiguous() for single_data in self.values()): + return True + else: + return False + + def __repr__(self) -> str: + if not self: + return f'Empty' + elif not self.is_ambiguous(): + return f'{super().__repr__()}' + else: + return Fore.RED + 'Ambiguous:' + Fore.RESET + f'{super().__repr__()}' + + +class AmbiguousFrozen[T](frozenset[T], AmbiguousData): + def __new__(cls, data: Collection[T]): + return super().__new__(cls, data) + + def is_ambiguous(self) -> bool: + return len(self) > 1 + + def __repr__(self) -> str: + if self.is_ambiguous(): + return Fore.RED + 'Ambiguous:' + Fore.RESET + f'{sorted(self)}' + else: + return f'{next(iter(self), None)}' + + def __or__(self, other) -> 'AmbiguousFrozen[T]': + return AmbiguousFrozen(super().__or__(other)) + + def __and__(self, other) -> 'AmbiguousFrozen[T]': + return AmbiguousFrozen(super().__and__(other)) + + def __xor__(self, other) -> 'AmbiguousFrozen[T]': + return AmbiguousFrozen(super().__xor__(other)) + + def __sub__(self, other) -> 'AmbiguousFrozen[T]': + return AmbiguousFrozen(super().__sub__(other)) diff --git a/docs_store/opcodes/avr_xml_gen/core/data_processing/data_management.py b/docs_store/opcodes/avr_xml_gen/core/data_processing/data_management.py new file mode 100644 index 00000000..2dc4b5a0 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/data_processing/data_management.py @@ -0,0 +1,139 @@ +from abc import ABC, abstractmethod +from typing import OrderedDict, Any +import logging +from .ambiguous_data import AmbiguousDict +import weakref + +logger=logging.getLogger(__name__) + +# Dictionary which could have only particular keys +# These keys are placed in `valid_keys` +# Class guarantees setting all keys from valid_keys even if they weren't passed in `__init__` +# Also it has unique id. It is used for `core/stages/data_analysis/instruction_building_manager` +class StrictDictionary(dict): + valid_keys = () + + def __init__(self, *args, **kwargs): + for k in kwargs.keys(): + if not self.is_valid_aspect_name(k): + return + super().__init__(*args, **kwargs) + for k in self.valid_keys: + if k not in self.keys(): + self[k] = None + self._id = hash(id(self)) + + def id(self): + return self._id + + def update(self, *args, **kwargs): + for k in kwargs.keys(): + if not self.is_valid_aspect_name(k): + return + super().update(*args, **kwargs) + + def __setitem__(self, k, v): + if not self.is_valid_aspect_name(k): + return + super().__setitem__(k, v) + + def is_valid_aspect_name(self, k: str) -> bool: + if k not in self.valid_keys: + logger.error(f'Try to set unvalid key: {k}') + return False + return True + + + +class SourceInfo(StrictDictionary): + valid_keys = ('pdf_path',) + +class InstructionDataMarkers(StrictDictionary): + valid_keys = ('header_height',) + +class InstructionForm(StrictDictionary): + valid_keys = ('mnemonic', + 'version', + 'table_data', + 'chapter_data', + ) + +class ExtractedInstructionsData(OrderedDict[str, AmbiguousDict]): + pass + +class ProcessedInstructionsData(OrderedDict[str, list[InstructionForm]]): + pass + + +class DataManager(ABC): + # Request is dictionary + # By default 'type':... is mandaotry item of request + # Valid values for 'type' are in valid_request_keys + valid_request_keys: dict = {'type': ()} + + # log parameter determines whether to log or not to log + def _is_key_in_request(self, request: dict, request_key: str, log=True) -> bool: + if request_key not in request.keys(): + if log: logger.error(f'Invalid request {request}; expected key {request_key}') + return False + return True + + # it is named with 'valided' word instead of 'valid' due to the problems wit aspects key in InstructionFormsManager + # i should change logic someday... + def _is_request_key_valided(self, request: dict, request_key: str, log=True) -> bool: + if not self._is_key_in_request(request, request_key, log): + return False + if request[request_key] not in self.valid_request_keys[request_key]: + if log: logger.error(f'Invalid request {request}; {request_key}: {request[request_key]} is invalid') + return False + return True + + @abstractmethod + def is_request_valid(self, request: dict) -> bool: + pass + + @abstractmethod + def request(self, request: dict) -> None | object: + pass + + +class Observer(ABC): + @abstractmethod + def update(self, message=None): + pass + + +class Subject: + def __init__(self): + self._observers_ref: list[weakref.ReferenceType[Observer]] = [] + + def notify(self, message=None): + for observer in self._observers_ref: + observer().update(message) + + def attach(self, observer: Observer): + if all(observer != wr() for wr in self._observers_ref): + self._observers_ref.append(weakref.ref(observer)) + + def detach(self, observer: Observer): + self._observers_ref[:] = [wr for wr in self._observers_ref if wr() is not observer] + + +# Stores objects of any type. Type itself is access key to the object +# e.g. require(int) returns variable with type int +class Context: + def __init__(self): + self._data_dict: dict = {} + + def record(self, data): + data_type = type(data) + if data_type in self._data_dict.keys(): + logger.debug(f'Data overwriting: {self._data_dict[data_type]} -> {data}') + self._data_dict[data_type] = data + + # returns value with data_type type. e.g `value = require(data_type), type(value) == data_type` + def __getitem__(self, data_type: type) -> Any: + if not data_type in self._data_dict.keys(): + logger.debug(f'Data acess failed: {data_type}') + return None + return self._data_dict[data_type] diff --git a/docs_store/opcodes/avr_xml_gen/core/gui/__init__.py b/docs_store/opcodes/avr_xml_gen/core/gui/__init__.py new file mode 100644 index 00000000..af7c4045 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/gui/__init__.py @@ -0,0 +1,2 @@ +from .common_gui import * +from .pdf_region_selector import * diff --git a/docs_store/opcodes/avr_xml_gen/core/gui/common_gui.py b/docs_store/opcodes/avr_xml_gen/core/gui/common_gui.py new file mode 100644 index 00000000..ec3bd7ce --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/gui/common_gui.py @@ -0,0 +1,213 @@ +import tkinter as tk +from typing import Any +import logging + +logger = logging.getLogger(__name__) + +class ScrollListbox(tk.Listbox): + def __init__(self, master=None, cnf={}, **kwargs): + # super().__init__(master, cnf, **kwargs) + # make some trick + self._frame = tk.Frame(master) + super().__init__(self._frame, cnf, selectmode=tk.SINGLE, **kwargs) + super().pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + # setup scrollbar + self._vscrollbar = tk.Scrollbar(self._frame, orient=tk.VERTICAL, command=self.yview) + self._vscrollbar.pack(side=tk.RIGHT, fill=tk.Y) + super().config(yscrollcommand=self._vscrollbar.set) + # call custom events + self.bind('', lambda event: self.event_generate('<>'), add='+') + self.bind('', lambda event: self.event_generate('<>'), add='+') + self.bind('', lambda event: self.event_generate('<>'), add='+') + self.bind('', lambda event: self.event_generate('<>'), add='+') + # init default events + self.bind('<>', self.__on_item_click, add='+') + self.bind('<>', self.__select_next, add='+') + self.bind('<>', self.__select_previous, add='+') + # init ind + self.current_ind = None + + # for correct work + def pack_configure(self, cnf={}, **kwargs): + self._frame.pack_configure(cnf, **kwargs) + + pack = configure = config = pack_configure + + def pack_forget(self): + self._frame.pack_forget() + + forget = pack_forget + + def pack_info(self): + return self._frame.pack_info() + + + info = pack_info + # magic is complete + + def __on_item_click(self, event): + selection = event.widget.curselection() + if selection and (selection[0] is not None): + self.current_ind = selection[0] + self.select_line(self.current_ind) + + def __select_next(self, event): + if self.current_ind is None: + self.current_ind = 0 + else: + self.current_ind = (self.current_ind + 1) % self.size() + self.select_line(self.current_ind) + + def __select_previous(self, event): + if self.current_ind is None: + self.current_ind = 0 + else: + self.current_ind = (self.current_ind - 1) % self.size() + self.select_line(self.current_ind) + + def select_line(self, line_ind: int): + if (line_ind < 0 or + line_ind >= self.size()): + return + self.select_clear(0, tk.END) + self.select_set(line_ind) + self.activate(line_ind) + + def set_current_line_highlight(self, enable: bool): + if not self.current_ind is None: + self.set_line_highlight(self.current_ind, enable) + + def set_line_highlight(self, line_ind: int, enable: bool): + if (line_ind < 0 or + line_ind >= self.size()): + return + if enable: + self.itemconfig(line_ind, bg='lavender') + else: + self.itemconfig(line_ind, bg='white') + + def clear_highlight(self): + for line_ind in range(0,self.size()): + self.itemconfig(line_ind, bg='white') + + +class OptionSelector(tk.Frame): + def __init__(self, master=None, cnf={}, **kwargs): + super().__init__(master, cnf, **kwargs) + + self._label = tk.Label(self) + self._label.pack(side=tk.TOP, fill=tk.X) + + self._scroll_listbox = ScrollListbox(self, **kwargs) + self._scroll_listbox.pack(side=tk.BOTTOM, fill=tk.BOTH, expand=True) + self._selected_line_ind = self._scroll_listbox.current_ind + + self._scroll_listbox.bind('', self.select_current_line, add='+') + self._scroll_listbox.bind('', self.select_current_line, add='+') + self._scroll_listbox.bind('', self.__on_focus_out, add='+') + + def set_name(self, name: str): + self._label.config(text=name) + + def insert(self, index, *elements): + self._scroll_listbox.insert(index, *elements) + + def delete(self, first, last = None): + self._scroll_listbox.delete(first, last) + + def get_current(self) -> Any: + current_ind = self._scroll_listbox.curselection() + return self._scroll_listbox.get(current_ind) + + def get_selected(self) -> Any: + if not self._selected_line_ind is None: + return self._scroll_listbox.get(self._selected_line_ind) + + def get_current_ind(self) -> int | None: + return self._scroll_listbox.current_ind + + def get_selected_ind(self) -> int | None: + return self._selected_line_ind + + def select_line(self, line_ind: int | None): + if self._selected_line_ind is not None: + self._scroll_listbox.set_line_highlight(self._selected_line_ind, False) + if isinstance(line_ind, int): + if line_ind < 0 or line_ind > self._scroll_listbox.size(): + logger.error('Line ind to select is out of range') + return + self._scroll_listbox.set_line_highlight(line_ind, True) + self._selected_line_ind = line_ind + + def select_current_line(self, event=None): + if not self._selected_line_ind is None: + self._scroll_listbox.set_line_highlight(self._selected_line_ind, False) + self._scroll_listbox.set_current_line_highlight(True) + self._selected_line_ind = self._scroll_listbox.current_ind + + def update(self): + self._scroll_listbox.clear_highlight() + if not self._selected_line_ind is None: + self._scroll_listbox.set_line_highlight(self._selected_line_ind, True) + + def __on_focus_out(self, event=None): + self._scroll_listbox.current_ind = self._selected_line_ind + + +class InfoDisplay(tk.Frame): + def __init__(self, master=None, **kwargs): + super().__init__(master) + kwargs.setdefault('state', tk.DISABLED) + + self.label = tk.Label(self) + self.label.pack(side=tk.TOP, fill=tk.X) + + self.text = tk.Text(self, **kwargs) + self.text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + + self.scrollbar = tk.Scrollbar(self, orient=tk.VERTICAL, command=self.text.yview) + self.scrollbar.pack(side=tk.RIGHT, fill=tk.Y) + + self.text.config(yscrollcommand=self.scrollbar.set) + + def set_name(self, name: str): + self.label.config(text=name) + + def insert(self, index, *elements): + self.text.config(state=tk.NORMAL) + self.text.insert(index, *elements) + self.text.config(state=tk.DISABLED) + + def clear(self): + self.text.config(state=tk.NORMAL) + self.text.delete(1.0, tk.END) + self.text.config(state=tk.DISABLED) + + +#Button for changing stage +class NextButton(tk.Button): + # `epoint` - event point + # Button generates event `<>` to point `Stage` to change `Stage` + # But `Stage` listens to `StageGUI`, not concrete button. Stage couldn't hear `<>` event + # if it is generated only inside button, due `tkinter` events logic. + # So Event needs to be generated inside `StageGUI`. And there are two options: + # 1. `StageGUI` listens to buttons `<>` and generates `<>` itself + # 2. Button generates `<>` directly inside `Stage` (`epoint` is needed) + # I chose 2 because I liked it more. + def __init__(self, master=None, epoint=None, **kwargs): + self._epoint = self if epoint is None else epoint + super().__init__(master, text='next', command=lambda: self._epoint.event_generate('<>'), **kwargs) + + +# The same as `NextButton`, but with additional 'direction': back (`<>` event) +# It might seem weird to get event `<>` for the `NextButton`, +# but `<>` for the `PreviousButton`. But fate decided so... +class BackNextButtons(tk.Frame): + def __init__(self, master=None, epoint=None, **kwargs): + super().__init__(master, **kwargs) + self._epoint = self if epoint is None else epoint + + self.back_button = tk.Button(self, text='back', command=lambda: self._epoint.event_generate('<>')) + self.next_button = tk.Button(self, text='next', command=lambda: self._epoint.event_generate('<>')) + self.back_button.pack(side=tk.LEFT, fill=tk.X, expand=True) + self.next_button.pack(side=tk.RIGHT, fill=tk.X, expand=True) diff --git a/docs_store/opcodes/avr_xml_gen/core/gui/pdf_region_selector.py b/docs_store/opcodes/avr_xml_gen/core/gui/pdf_region_selector.py new file mode 100644 index 00000000..4e8331a0 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/gui/pdf_region_selector.py @@ -0,0 +1,177 @@ +import tkinter as tk +from tkinter import filedialog +from PIL import ImageTk +import pdfplumber +from typing import Iterable + + +# The name speaks for itself. +# Selected items stores in _selected_elements +# Pdf page should be centered, but I forgot to make it +class PDFRegionSelector(tk.Frame): + # There is added my own key: `pdf_selection` + # It determines if pdf choosing button will be created + def __init__(self, master=None, cnf={}, pdf_selection=True, **kwargs): + super().__init__(master, cnf, **kwargs) + + self.__setup_ui(pdf_selection) + self.__setup_bindings(pdf_selection) + + self._pdf = None + self._page = None + elements = {'word', 'line', 'curve'} + self._page_elements = {e: [] for e in elements} + self._selected_elements = {e: [] for e in elements} + + self._current_page_ind = 0 + # pdf page height / image height + self._scale_factor = 1 + self._highlight_color = 'blue' + + + def __setup_ui(self, pdf_selection=True): + self._canvas = tk.Canvas(self) + self._canvas.pack(side=tk.TOP, fill=tk.BOTH, expand=True) + + self._button_frame = tk.Frame(self) + self._button_frame.pack(side=tk.BOTTOM, fill=tk.X) + button_pack = {'side': tk.LEFT, + 'fill': tk.X, + 'expand': True, + } + if pdf_selection: + self._load_button = tk.Button(self._button_frame, text='Load PDF') + self._load_button.pack(button_pack) + self._prev_page_button = tk.Button(self._button_frame, text='Previous Page') + self._prev_page_button.pack(button_pack) + self._next_page_button = tk.Button(self._button_frame, text='Next Page') + self._next_page_button.pack(button_pack) + + + def __setup_bindings(self, pdf_selection=True): + self._canvas.bind("", self.__start_rectangle) + self._canvas.bind("", self.__draw_rectangle) + self._canvas.bind("", self.__finish_rectangle) + self._canvas.bind('', self.show_page) + + if pdf_selection: + self._load_button.config(command=self.load_pdf) + self._prev_page_button.config(command=self.prev_page) + self._next_page_button.config(command=self.next_page) + + + def __start_rectangle(self, event): + self._canvas.delete('selection') + self._rect_start = (event.x, event.y) + self._rect_ind = self._canvas.create_rectangle(event.x, event.y, event.x, event.y, fill=self._highlight_color, stipple='gray12') + + + def __draw_rectangle(self, event): + if not self._rect_ind: + return + + self._canvas.coords(self._rect_ind, self._rect_start[0], self._rect_start[1], event.x, event.y) + rect = self._canvas.coords(self._rect_ind) + self._canvas.delete('selection') + self.__select_internal_elements(rect) + + + def __select_internal_elements(self, image_rect: Iterable): + _page_rect = tuple(t * self._scale_factor for t in image_rect) + for e_name in self._page_elements.keys(): + elements = self.__get_internal_elements(_page_rect, e_name) + self._selected_elements[e_name] = elements + + for elements in self._selected_elements.values(): + self.__highlight_elements(elements, self._highlight_color) + + + def __get_internal_elements(self, _page_rect: Iterable, element_name: str) -> list: + if not self._page: + return [] + x0, y0, x1, y1 = _page_rect + internal_elements = [ + e for e in self._page_elements[element_name] + if not (x0 > e['x1'] or + x1 < e['x0'] or + y0 > e['bottom'] or + y1 < e['top']) + ] + return internal_elements + + + def __highlight_elements(self, elements: Iterable, color: str): + for e in elements: + x0 = e['x0'] / self._scale_factor + y0 = e['top'] / self._scale_factor + x1 = e['x1'] / self._scale_factor + y1 = e['bottom'] / self._scale_factor + self._canvas.create_rectangle(x0, y0, x1, y1, fill=color, tags='selection', stipple='gray25') + + + def __finish_rectangle(self, event): + self._canvas.delete(self._rect_ind) + self.event_generate('<>', self.__on_highlight, add='+') + + def __on_highlight(self, event) -> None: + if self._data_manager is None: + raise RuntimeError('error in programm logic: expected non None data manager') + self._data_manager.request({'type': 'set_height', 'elements': self._pdf_region_selector.get_selected_elements()}) + height = self._data_manager.request({'type': 'get_height'}) + self._info_label.configure(text=f'Upper bound height: {height}') + + def set_pdf(self, pdf_path: str) -> None: + self._pdf_region_selector.set_pdf(pdf_path) + + +class SettingHeader(BidirectionalStage): + def __init__(self, context: Context, master=None, cnf={}, **kwargs) -> None: + super().__init__(context, master, cnf, **kwargs) + # The same as in `SettingHeaderTask.__init__()` + self._data_manager = DataMarkersManager(self._context[InstructionDataMarkers]) + self.set_gui(SettingHeaderGUI(self._data_manager, master, cnf,**kwargs)) + self.set_task(SettingHeaderTask(self._context)) + + def execute(self) -> None: + super().execute() + # The same as in `SettingHeaderTask __init__()` + pdf_path = self._context[SourceInfo]['pdf_path'] + # I don't know how to fix warnings '_gui has no set_pdf function + # It is raised because _gui is declared as StageGUI type in parent class + self._gui.set_pdf(pdf_path) diff --git a/docs_store/opcodes/avr_xml_gen/core/stages/stage.py b/docs_store/opcodes/avr_xml_gen/core/stages/stage.py new file mode 100644 index 00000000..dababb8f --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/stages/stage.py @@ -0,0 +1,141 @@ +from abc import ABC, abstractmethod +import tkinter as tk +import logging +from core.data_processing import Context, DataManager + + +logger=logging.getLogger(__name__) + + +class StageGUI(ABC, tk.Frame): + def __init__(self, data_manager: DataManager | None=None, master=None, cnf={}, **kwargs) -> None: + super().__init__(master, cnf={}, **kwargs) + self._kwargs: dict = {} + self._cnf: dict = {} + self._data_manager = data_manager + + def enable(self, cnf={}, **kwargs) -> None: + if kwargs or cnf: + self._kwargs = kwargs + self._cnf = cnf + else: + if self._kwargs: + kwargs = self._kwargs + if self._cnf: + cnf = self._cnf + self.pack(cnf, **kwargs) + + def pack(self, cnf={}, **kwargs): + super().pack(cnf, **kwargs) + self._kwargs = kwargs + self._cnf = cnf + + def disable(self) -> None: + self.pack_forget() + + +class StageTask(ABC): + def __init__(self, context: Context) -> None: + self._context = context + + @abstractmethod + def is_completed(self) -> bool: + pass + + +class Stage(ABC): + def __init__(self, context: Context, master=None, cnf={}, **kwargs) -> None: + self._context = context + self._gui: StageGUI | None = None + self._task: StageTask | None = None + self._next_stage: Stage | None = None + self.permanently_completed: bool = False + + def set_task(self, task: StageTask) -> None: + self._task = task + + def set_next(self, next_stage: 'Stage | None') -> None: + self._next_stage = next_stage + + def set_gui(self, gui: 'StageGUI | None') -> None: + self._gui = gui + if self._gui is not None: + self._gui.bind('<>', self.try_complete, add='+') + + def get_next(self) -> 'Stage | None': + return self._next_stage + + def execute(self) -> None: + if self._gui is not None: + self._gui.enable(fill=tk.BOTH, expand=True) + self._gui.update() + + def try_complete(self, event=None) -> None: + if self._task is None: + logger.error(f'Task isn\'t setted') + return + if not self._task.is_completed(): + logger.info(f'Can\'t complete stage({self}): task({self._task}) isn\'t completed') + return + self._complete(event) + + def _complete(self, event=None) -> None: + while True: + if self._next_stage is None: + logger.error('Can\'t execute the next stage due its absence') + return + elif not self._next_stage.permanently_completed: + if self._gui is not None: + self._gui.disable() + self._next_stage.execute() + return + else: + self.set_next(self._next_stage.get_next()) + + +# there is two direcitons: forward and backward +# forward is 'complete', backward is 'go_back' +# naming may seem weird, I commented it in `core.gui.common_ui.BackNextButton` +class BidirectionalStage(Stage): + def __init__(self, context: Context, master=None, cnf={}, **kwargs) -> None: + super().__init__(context, master, cnf, **kwargs) + self._previous_stage: 'BidirectionalStage | Stage | None' = None + self._next_stage: 'BidirectionalStage | Stage | None' = self._next_stage + + def set_gui(self, gui: 'StageGUI | None'): + super().set_gui(gui) + if self._gui is not None: + self._gui.bind('<>', self.try_go_back) + + def set_next(self, next_stage: 'BidirectionalStage | Stage | None') -> None: + self._next_stage = next_stage + + def set_previous(self, previous_stage: 'BidirectionalStage | Stage | None') -> None: + self._previous_stage = previous_stage + + def get_next(self) -> 'BidirectionalStage | Stage | None': + return self._next_stage + + def get_previous(self) -> 'BidirectionalStage | Stage | None': + return self._previous_stage + + # In fact I could just make one function instead of two (try_go_back, _go_back) + # But I created two functions for the sake of symmetry with try_complete, _complete + def try_go_back(self, event=None) -> None: + self._go_back(event) + + def _go_back(self, event=None) -> None: + while True: + if self._previous_stage is None: + logger.error(f'Can\'t execute the previous stage due its absence') + return + elif not self._previous_stage.permanently_completed: + if self._gui is not None: + self._gui.disable() + self._previous_stage.execute() + return + elif not isinstance(self._previous_stage, BidirectionalStage): + logger.error(f'Previous stage is unreachable') + return + else: + self.set_previous(self._previous_stage.get_previous()) diff --git a/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/__init__.py b/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/__init__.py new file mode 100644 index 00000000..d1b4ed78 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/__init__.py @@ -0,0 +1,2 @@ +from .xml_generation import * +from .instruction_xml_builder import * diff --git a/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/instruction_xml_builder.py b/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/instruction_xml_builder.py new file mode 100644 index 00000000..25e3eccc --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/instruction_xml_builder.py @@ -0,0 +1,152 @@ +import xml.etree.ElementTree as ET +import xml.dom.minidom as minidom + +from pdfplumber import table +from core.data_processing import InstructionForm + + +class InstructionXMLBuilder: + status_registers = ['I', 'T', 'H', 'S', 'V', 'N', 'Z', 'C'] + + @classmethod + def create_instruction_xml(cls, instruction_name: str, instruction_forms: list[InstructionForm]) -> ET.Element: + root = ET.Element('Instruction') + root.set('name', instruction_name) + + for form in instruction_forms: + instruction_form = cls.create_instruction_form_xml(form) + instruction_form_version = instruction_form.get('value') + if instruction_form_version is None: + raise RuntimeError(f'form aspect: {form} instruction form version is None') + instruction_form_content = instruction_form.find('InstructionForm') + if instruction_form_content is None: + raise RuntimeError(f'form aspect: {form} instruction form content is None') + + instruction_version = next((version for version in root.findall('Version') if version.get('value') == instruction_form_version), None) + if instruction_version is None: + instruction_version = ET.SubElement(root, 'Version') + instruction_version.set('value', instruction_form_version) + + instruction_version.append(instruction_form_content) + + # sorting mnemonics + for version in root: + sorted_instruction_forms = sorted(version, key=lambda instruction_form: str(instruction_form.get('mnemonic'))) + for elem in version.findall('*'): + version.remove(elem) + version.extend(sorted_instruction_forms) + + # sorting versions + sorted_versions = sorted(root, key=lambda version: str(version.get('value'))) + for version in root.findall('*'): + root.remove(version) + root.extend(sorted_versions) + + return root + + + @classmethod + def create_instruction_form_xml(cls, form: InstructionForm) -> ET.Element: + if not cls.are_instruction_form_aspects_valid(form): + raise RuntimeError(f'form aspects not valid: {form}') + + root = ET.Element('Version') + version = form['version'] + if version: + root.set('value', version) + + form_et = ET.SubElement(root, 'InstructionForm') + mnemonic = form['mnemonic'] + if mnemonic: + form_et.set('mnemonic', mnemonic) + + table_sections = form['table_data'] + if table_sections: + if table_sections['Description']: + form_et.set('summary', str(table_sections['Description'])) + + if table_sections['Operands']: + for operand in table_sections['Operands']: + operand_et = ET.SubElement(form_et, 'Operand') + operand_et.set('type', operand) + + if table_sections['Clocks']: + clocks_et = ET.SubElement(form_et, 'Clocks') + for key, value in table_sections['Clocks'].items(): + clock_et = ET.SubElement(clocks_et, str(key)) + clock_et.set('value', value) + + chapter_sections = form['chapter_data'] + if chapter_sections: + if chapter_sections['SREG']: + sreg = ET.SubElement(form_et, 'SREG') + for i in range(0, len(cls.status_registers)): + flag_et = ET.SubElement(sreg, cls.status_registers[i]) + flag_et.set('value', chapter_sections['SREG'][0][i]) + + if chapter_sections['Opcode']: + encoding_et = ET.SubElement(form_et, 'Encoding') + for bits in chapter_sections['Opcode']: + opcode_niddle_et = ET.SubElement(encoding_et, 'Opcode') + opcode_niddle_et.set('nibble', bits) + + return root + + + @classmethod + def are_instruction_form_aspects_valid(cls, form_aspects: dict) -> bool: + for aspect_name in InstructionForm.valid_keys: + if (aspect_name not in form_aspects.keys()): + return False + + return True + + + @classmethod + def are_instruction_form_aspects_filled(cls, form_aspects: dict) -> bool: + for aspect_name in InstructionForm.valid_keys: + aspect = form_aspects[aspect_name] + + match aspect_name: + case 'mnemonic'|'version': + if not isinstance(aspect, str): + return False + + case 'table_data': + aspect_sections = ['Description', 'Operands', 'Clocks'] + if (not isinstance(aspect, dict) or + any(section not in aspect.keys() for section in aspect_sections)): + + return False + + case 'chapter_data': + aspect_sections = ['Opcode', 'SREG'] + if (not isinstance(aspect, dict) or + any(section not in aspect.keys() for section in aspect_sections)): + + return False + return True + + + @classmethod + def format_et_to_str(cls, tree: ET.ElementTree) -> str: + xml_str = ET.tostring(tree.getroot(), encoding='utf-8', xml_declaration=True).decode('utf-8') + + formatted_xml_str = minidom.parseString(xml_str).toprettyxml(indent=" ") + return formatted_xml_str + + + @classmethod + def create_instruction_form_str(cls, form: InstructionForm) -> str: + form_el = cls.create_instruction_form_xml(form) + form_tree = ET.ElementTree(form_el) + form_str = cls.format_et_to_str(form_tree) + return form_str + + + @classmethod + def create_instruction_str(cls, name: str, instruction_forms: list[InstructionForm]) -> str: + instruction = cls.create_instruction_xml(name, instruction_forms) + instruction_tree = ET.ElementTree(instruction) + instruction_str = cls.format_et_to_str(instruction_tree) + return instruction_str diff --git a/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/xml_generation.py b/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/xml_generation.py new file mode 100644 index 00000000..55ae591f --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/core/stages/xml_generation/xml_generation.py @@ -0,0 +1,32 @@ +# internal +from .instruction_xml_builder import InstructionXMLBuilder + +# external +from core.stages import Stage +from core.data_processing.data_management import Context, ProcessedInstructionsData +import xml.etree.ElementTree as ET +from bs4 import BeautifulSoup + + +class XMLGeneration(Stage): + def execute(self) -> None: + for name, data in self._context[ProcessedInstructionsData].items(): + print(f'{name}: {data}') + self._build_xml() + self.permanently_completed = True + self._complete() + + + def _build_xml(self): + instructions_data = self._context[ProcessedInstructionsData] + root = ET.Element('InstructionSet') + root.set('name', 'AVR') + for instruction_name, instruction_forms in instructions_data.items(): + instruction_xml = InstructionXMLBuilder.create_instruction_xml(instruction_name, instruction_forms) + root.append(instruction_xml) + tree = ET.ElementTree(root) + tree.write('avr_instructions.xml', encoding='utf-8', xml_declaration=True) + bs = BeautifulSoup(open('avr_instructions.xml'), 'xml') + instructions_xml_str = bs.prettify() + with open('avr_instructions.xml', 'w') as file: + file.write(instructions_xml_str) diff --git a/docs_store/opcodes/avr_xml_gen/requirements.txt b/docs_store/opcodes/avr_xml_gen/requirements.txt new file mode 100644 index 00000000..3d0468b8 --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/requirements.txt @@ -0,0 +1,5 @@ +beautifulsoup4==4.13.3 +colorama==0.4.6 +numpy==2.2.2 +pdfplumber==0.11.4 +Pillow==11.1.0 diff --git a/docs_store/opcodes/avr_xml_gen/test_instruction_forms_manager.py b/docs_store/opcodes/avr_xml_gen/test_instruction_forms_manager.py new file mode 100644 index 00000000..34c9874f --- /dev/null +++ b/docs_store/opcodes/avr_xml_gen/test_instruction_forms_manager.py @@ -0,0 +1,86 @@ +from core.logging import setup_logging +setup_logging() + +from core.stages.data_analysis import InstructionFormsManager, InstructionParser +from core.data_processing import ExtractedInstructionsData, AmbiguousDict +import pickle + +data_path = './intermediate_data/avr_instructions_data.pkl' +instructions_data: ExtractedInstructionsData + +with open(data_path, 'rb') as data: + instructions_data = pickle.load(data) + +storage_location = [] +instruction_name = 'LD' +instruction_data: AmbiguousDict = instructions_data[instruction_name] + +manager = InstructionFormsManager(storage_location, instruction_name, instruction_data) + + +def test_request_validation_with_correct_keys(): + assert manager.is_request_valid({'type': 'add', 'target': 'form'}) == True + assert manager.is_request_valid({'type': 'get', 'target': 'aspects', 'aspects': ['version', 'chapter_data']}) == True + # assert manager.is_request_valid({'type': 'record', 'target': 'form', 'aspects': {'mnemonics': None, 'table_data': None}, 'form_ind': 0}) == True + + +def test_request_validation_with_incorrect_keys(): + assert manager.is_request_valid({'not_type': 'add', 'target': 'form'}) == False + assert manager.is_request_valid({'type': 'add', 'not target': 'form'}) == False + + +def test_request_validation_with_missed_keys(): + assert manager.is_request_valid({'type': 'add'}) == False + assert manager.is_request_valid({'target': 'form'}) == False + assert manager.is_request_valid({'type': 'delete', 'target': 'form'}) == False + # assert manager.is_request_valid({'type': 'record', 'target': 'form', 'form_ind': 0}) == False + assert manager.is_request_valid({'type': 'record', 'target': 'form', 'aspects': {'mnemonics': None}}) == False + + +parser = manager._instruction_parser + +def test_request_add(): + forms = manager._forms + old_forms_len = len(forms) + manager.request({'type': 'add', 'target': 'form'}) + assert len(forms) == old_forms_len + 1 + manager.request({'type': 'add', 'target': 'form', 'aspects': {'mnemonic': parser.get_aspect_data('mnemonic')}}) + assert len(forms) == old_forms_len + 2 + + expected_form = InstructionParser.create_empty_form() + assert forms[0] == expected_form + + expected_form['mnemonic'] = parser.get_aspect_data('mnemonic') + assert forms[1] == expected_form + + +def test_request_delete(): + forms = manager._forms + forms.clear() + for i in range(2): + forms.append(parser.create_empty_form()) + + manager.request({'type': 'delete', 'target': 'form', 'form_ind': 0}) + assert len(forms) == 1 + manager.request({'type': 'delete', 'target': 'form', 'form_ind': 0}) + assert len(forms) == 0 + manager.request({'type': 'delete', 'target': 'form', 'form_ind': 0}) + assert len(forms) == 0 + + +def test_request_record(): + if len(manager._forms) < 1: + manager._forms.append(parser.create_empty_form()) + form = manager._forms[0] + manager.request({'type': 'record', 'target': 'form', 'form_ind': 0, 'aspects': {'mnemonic': parser.get_aspect_data('mnemonic')}}) + expected_form = InstructionParser.create_empty_form() + expected_form['mnemonic'] = parser.get_aspect_data('mnemonic') + assert form == expected_form + + +def test_request_get(): + if len(manager._forms) < 1: + manager._forms.append(parser.create_empty_form()) + form = manager.request({'type': 'get', 'target': 'form', 'form_ind': 0}) + expected_form = manager._forms[0] + assert form == expected_form