From b9c4264f6d07eb08f76b4ba11fc84683177563cf Mon Sep 17 00:00:00 2001 From: Daniel Morais Date: Thu, 6 Jul 2023 18:22:20 +0200 Subject: [PATCH] Replace tesseract by fake OCR --- speculos/main.py | 3 +- speculos/mcu/bagl_font.py | 93 ++--------- speculos/mcu/ocr.py | 325 +++++++++++++++++++++++-------------- speculos/mcu/seproxyhal.py | 9 +- 4 files changed, 223 insertions(+), 207 deletions(-) diff --git a/speculos/main.py b/speculos/main.py index b294f054..72a9eac5 100644 --- a/speculos/main.py +++ b/speculos/main.py @@ -457,7 +457,8 @@ def main(prog=None): automation_server=automation_server, transport=args.usb, fonts_path=pkg_resources.resource_filename(__name__, "/fonts"), - api_level=args.apiLevel) + api_level=args.apiLevel, + model=args.model) button = None if args.button_port: diff --git a/speculos/mcu/bagl_font.py b/speculos/mcu/bagl_font.py index 2fbfb81a..1eb1ee95 100644 --- a/speculos/mcu/bagl_font.py +++ b/speculos/mcu/bagl_font.py @@ -43,8 +43,7 @@ } bagl_font_character_t; ''' -Font = namedtuple( - "Font", "font_id font_name bpp char_height baseline_height char_kerning first_char last_char characters bitmap") +Font = namedtuple("Font", "font_id bpp char_height baseline_height char_kerning first_char last_char characters bitmap") FontCharacter = namedtuple("FontCharacter", "char_width bitmap_byte_count bitmap_offset") bitmapLUCIDA_CONSOLE_6PT_8H = [ @@ -3061,89 +3060,29 @@ ] FONTS = [ - Font( - BAGL_FONT_OPEN_SANS_REGULAR_11px, - "bagl_font_open_sans_regular_11px", - 1, 12, 9, 0, - 0x0020, 0x007F, - charactersOPEN_SANS_REGULAR_11PX, - bitmapOPEN_SANS_REGULAR_11PX), - Font( - BAGL_FONT_OPEN_SANS_EXTRABOLD_11px, - "bagl_font_open_sans_extrabold_11px", - 1, 12, 9, 0, 0x0020, 0x007F, - charactersOPEN_SANS_EXTRABOLD_11PX, - bitmapOPEN_SANS_EXTRABOLD_11PX), - Font( - BAGL_FONT_OPEN_SANS_LIGHT_16px, - "bagl_font_open_sans_light_16px", - 1, 18, 13, 0, - 0x0020, 0x007F, - charactersOPEN_SANS_LIGHT_16PX, - bitmapOPEN_SANS_LIGHT_16PX), - Font( - BAGL_FONT_LUCIDA_CONSOLE_8PX, - None, - 1, 8, 16, 0, - 0x0020, 0x00ff, - charactersLUCIDA_CONSOLE_6PT_8H, + Font(BAGL_FONT_LUCIDA_CONSOLE_8PX, 1, 8, 16, 0, 0x0020, 0x00ff, charactersLUCIDA_CONSOLE_6PT_8H, bitmapLUCIDA_CONSOLE_6PT_8H), - Font( - BAGL_FONT_OPEN_SANS_LIGHT_16_22PX, - None, - 4, 22, 16, 0, - 0x0020, 0x007f, - charactersOPEN_SANS_LIGHT_16_22PX, + Font(BAGL_FONT_OPEN_SANS_LIGHT_16_22PX, 4, 22, 16, 0, 0x0020, 0x007f, charactersOPEN_SANS_LIGHT_16_22PX, bitmapOPEN_SANS_LIGHT_16_22PX), - Font( - BAGL_FONT_OPEN_SANS_REGULAR_8_11PX, - None, - 4, 11, 8, 0, - 0x0020, 0x007f, - charactersOPEN_SANS_REGULAR_8_11PX, + Font(BAGL_FONT_OPEN_SANS_REGULAR_8_11PX, 4, 11, 8, 0, 0x0020, 0x007f, charactersOPEN_SANS_REGULAR_8_11PX, bitmapOPEN_SANS_REGULAR_8_11PX), - Font( - BAGL_FONT_OPEN_SANS_REGULAR_10_13PX, - None, - 4, 14, 10, 0, - 0x0020, 0x007F, - charactersOPEN_SANS_REGULAR_10_13PX, + Font(BAGL_FONT_OPEN_SANS_REGULAR_10_13PX, 4, 14, 10, 0, 0x0020, 0x007F, charactersOPEN_SANS_REGULAR_10_13PX, bitmapOPEN_SANS_REGULAR_10_13PX), - Font( - BAGL_FONT_OPEN_SANS_SEMIBOLD_10_13PX, - None, - 4, 14, 10, 0, - 0x0020, 0x007f, - charactersOPEN_SANS_SEMIBOLD_10_13PX, + Font(BAGL_FONT_OPEN_SANS_EXTRABOLD_11px, 1, 12, 9, 0, 0x0020, 0x007F, charactersOPEN_SANS_EXTRABOLD_11PX, + bitmapOPEN_SANS_EXTRABOLD_11PX), + Font(BAGL_FONT_OPEN_SANS_REGULAR_11px, 1, 12, 9, 0, 0x0020, 0x007F, charactersOPEN_SANS_REGULAR_11PX, + bitmapOPEN_SANS_REGULAR_11PX), + Font(BAGL_FONT_OPEN_SANS_LIGHT_16px, 1, 18, 13, 0, 0x0020, 0x007F, charactersOPEN_SANS_LIGHT_16PX, + bitmapOPEN_SANS_LIGHT_16PX), + Font(BAGL_FONT_OPEN_SANS_SEMIBOLD_10_13PX, 4, 14, 10, 0, 0x0020, 0x007f, charactersOPEN_SANS_SEMIBOLD_10_13PX, bitmapOPEN_SANS_SEMIBOLD_10_13PX), - Font( - BAGL_FONT_OPEN_SANS_SEMIBOLD_8_11PX, - None, - 4, 11, 8, 0, - 0x0020, 0x007f, - charactersOPEN_SANS_SEMIBOLD_8_11PX, + Font(BAGL_FONT_OPEN_SANS_SEMIBOLD_8_11PX, 4, 11, 8, 0, 0x0020, 0x007f, charactersOPEN_SANS_SEMIBOLD_8_11PX, bitmapOPEN_SANS_SEMIBOLD_8_11PX), - Font( - BAGL_FONT_OPEN_SANS_REGULAR_11_14PX, - None, - 4, 16, 12, 0, - 0x0020, 0x007f, - charactersOPEN_SANS_REGULAR_11_14PX, + Font(BAGL_FONT_OPEN_SANS_REGULAR_11_14PX, 4, 16, 12, 0, 0x0020, 0x007f, charactersOPEN_SANS_REGULAR_11_14PX, bitmapOPEN_SANS_REGULAR_11_14PX), - Font( - BAGL_FONT_OPEN_SANS_SEMIBOLD_11_16PX, - None, - 4, 16, 12, 0, - 0x0020, 0x007f, - charactersOPEN_SANS_SEMIBOLD_11_16PX, + Font(BAGL_FONT_OPEN_SANS_SEMIBOLD_11_16PX, 4, 16, 12, 0, 0x0020, 0x007f, charactersOPEN_SANS_SEMIBOLD_11_16PX, bitmapOPEN_SANS_SEMIBOLD_11_16PX), - Font( - BAGL_FONT_SYMBOLS_0, - None, - 4, 16, 16, 0, - 0x0000, 0x0006, - charactersSYMBOLS_0, - bitmapSYMBOLS_0) + Font(BAGL_FONT_SYMBOLS_0, 4, 16, 16, 0, 0x0000, 0x0006, charactersSYMBOLS_0, bitmapSYMBOLS_0) ] diff --git a/speculos/mcu/ocr.py b/speculos/mcu/ocr.py index 3c46a709..f1048ef0 100644 --- a/speculos/mcu/ocr.py +++ b/speculos/mcu/ocr.py @@ -5,6 +5,16 @@ import json import os import string +from construct import Struct, Int8ul, Int16ul + +nbgl_area_t = Struct( + "x0" / Int16ul, + "y0" / Int16ul, + "width" / Int16ul, + "height" / Int16ul, + "color" / Int8ul, + "bpp" / Int8ul, +) from .automation import TextEvent from . import bagl_font @@ -83,41 +93,6 @@ def display_char(font: bagl_font.Font, char: str) -> None: print("\n".join(split_bytes(char.bitmap, font.bpp * char.char.char_width))) -def get_json_font(json_name: str) -> Mapping[Char, BitMapChar]: - # If no json filename was provided, just return - if json_name is None: - return None - - # Add the fonts path (JSON files are in speculos/fonts) - json_name = os.path.join(OCR.fonts_path, json_name) - # Add api level information and file extension - json_name += f"-api-level-{OCR.api_level}.json" - - # Read the JSON file if we found one - font_info = [] - if os.path.exists(json_name): - with open(json_name, "r") as json_file: - font_info = json.load(json_file, strict=False) - font_info = font_info[0] - # Deserialize bitmap - bitmap = base64.b64decode(font_info['bitmap']) - # Build BitMapChar - font_map = {} - for character in font_info['bagl_font_character']: - char = character['char'] - offset = character['bitmap_offset'] - count = character['bitmap_byte_count'] - # Add this entry in font_map - font_map[chr(char)] = BitMapChar( - char, - bytes(bitmap[offset:(offset + count)]), - ) - OCR.json_fonts.append(font_map) - return font_map - - return None - - def get_font_map(font: bagl_font.Font): if font.font_id not in __FONT_MAP: __FONT_MAP[font.font_id] = _get_font_map(font) @@ -125,11 +100,6 @@ def get_font_map(font: bagl_font.Font): def _get_font_map(font: bagl_font.Font) -> Mapping[Char, BitMapChar]: - # Do we have a JSON file containing all the information we want? - json_font = get_json_font(font.font_name) - if json_font is not None: - return json_font - font_map = {} for ord_char, font_char in zip( range(font.first_char, font.last_char), font.characters @@ -148,63 +118,190 @@ def _get_font_map(font: bagl_font.Font) -> Mapping[Char, BitMapChar]: return font_map -def find_char_from_bitmap(bitmap: BitMap): - """ - Find a character from a bitmap - >>> font = get_font(4) - >>> char = get_char(font, 'c') - >>> find_char_from_bitmap(char.bitmap) - 'c' - """ - all_values = [] +class OCR: + + # Maximum space for a letter to be considered part of the same word + MAX_BLANK_SPACE = 12 + # Current API_LEVEL + api_level = 0 + # Current device model (nanox, nanosp, stax etc) + model = "" + # Font names for LNX & LNS+ (most used ones first) + font_names_nanox_nanosp = ["bagl_font_open_sans_regular_11px", + "bagl_font_open_sans_extrabold_11px", + "bagl_font_open_sans_light_16px"] + # Font names for stax (most used ones first) + font_names_stax = ["nbgl_font_inter_regular_24", + "nbgl_font_inter_semibold_24", + "nbgl_font_inter_medium_32", + "nbgl_font_inter_regular_24_1bpp", + "nbgl_font_inter_semibold_24_1bpp", + "nbgl_font_inter_medium_32_1bpp", + "nbgl_font_hmalpha_mono_medium_32"] + # To keep track of loaded JSON fonts + json_fonts = [] + # By default use legacy OCR (built-in font bitmaps) legacy = True - for font in bagl_font.FONTS: - font_map = get_font_map(font) - if font_map in OCR.json_fonts: - # This is a loaded JSON font => new behaviour - for character_value, bitmap_struct in font_map.items(): - if bitmap_struct.bitmap == bitmap: - all_values.append(character_value) - legacy = False - else: - # Not a loaded JSON font => legacy behaviour + + def __init__(self, fonts_path=None, api_level=None, model=None): + self.events: List[TextEvent] = [] + # Save model & the API_LEVEL in a class variable + if api_level is not None: + OCR.api_level = int(api_level) + if model is not None: + OCR.model = str(model) + # With API_LEVEL 5 and >= 11, we can read fonts in JSON files + if OCR.api_level == 5 or OCR.api_level >= 11: + # Build font_names list using api_level & model + if OCR.model == "stax": + names = OCR.font_names_stax + struct_name = 'nbgl_font_character' + OCR.MAX_BLANK_SPACE = 24 # Characters are bigger on Stax + else: + names = OCR.font_names_nanox_nanosp + struct_name = 'bagl_font_character' + + for name in names: + # Add the fonts path (JSON files are in speculos/fonts) + json_name = os.path.join(fonts_path, name) + # Add api level information and file extension + json_name += f"-api-level-{OCR.api_level}.json" + if os.path.exists(json_name): + self.get_json_font(json_name, struct_name) + + if len(OCR.json_fonts) != 0: + OCR.legacy = False + else: + print("WARNING: didn't find any JSON font files => OCR will "\ + "not work properly!\n") + + @staticmethod + def get_json_font(name, struct_name) -> Mapping[Char, BitMapChar]: + """ + Read the JSON file and parse all character information + """ + with open(name, "r") as json_file: + font_info = json.load(json_file, strict=False) + font_info = font_info[0] + # Deserialize bitmap + bitmap = base64.b64decode(font_info['bitmap']) + if not struct_name in font_info: + return + # Build BitMapChar + font_map = {} + for character in font_info[struct_name]: + char = character['char'] + offset = character['bitmap_offset'] + count = character['bitmap_byte_count'] + # Add this entry in font_map + font_map[chr(char)] = BitMapChar( + char, + bytes(bitmap[offset:(offset + count)]), + ) + OCR.json_fonts.append(font_map) + + @staticmethod + def find_char_from_bitmap_legacy(bitmap: BitMap): + """ + Find a character from a bitmap + >>> font = get_font(4) + >>> char = get_char(font, 'c') + >>> find_char_from_bitmap(char.bitmap) + 'c' + """ + all_values = [] + for font in bagl_font.FONTS: + font_map = get_font_map(font) for character_value, bitmap_struct in font_map.items(): if bitmap_struct.bitmap.startswith(bitmap): - # sometimes (but not always) the bitmap being passed is - # shortened by one '\0' byte, not matching the exact bitmap + # sometimes (but not always) the bitmap being passed is shortened + # by one '\x00' byte, not matching the exact bitmap # provided in the font. Hence the 'residual' computation residual_bytes: bytes = bitmap_struct.bitmap[len(bitmap):] if all(b == 0 for b in residual_bytes): all_values.append(character_value) + if all_values: + char = max([x for x in all_values]) + if char == "\x80": + char = " " + return char + return None + + def analyze_bitmap_legacy(self, x, y, w, h, bitmap): + char = self.find_char_from_bitmap_legacy(bitmap) + if char: + if self.events and y <= self.events[-1].y: + self.events[-1].text += char + else: + # create a new TextEvent if there are no events yet + # or if there is a new line + self.events.append(TextEvent(char, x, y, w, h)) + + @staticmethod + def find_char_from_bitmap(bitmap: BitMap): + """ + Parse loaded JSON fonts and compare font bitmaps with the one provided + """ + all_values = [] + for font_map in OCR.json_fonts: + for character_value, bitmap_struct in font_map.items(): + if bitmap_struct.bitmap == bitmap: + all_values.append(character_value) if all_values: char = max([x for x in all_values]) if char == "\x80": char = " " - return legacy, char + return char - return legacy, None - - -class OCR: - - # Maximum space for a letter to be considered part of the same word - MAX_BLANK_SPACE = 12 - # Location of JSON fonts - fonts_path = "" - # To keep track of loaded JSON fonts - json_fonts = [] - # Current API_LEVEL - api_level = 0 + return None - def __init__(self, fonts_path=None, api_level=None): - self.events: List[TextEvent] = [] - # Save fonts path & the API_LEVEL in a class variable - if fonts_path is not None: - OCR.fonts_path = fonts_path - if api_level is not None: - OCR.api_level = int(api_level) + def store_char_in_last_event(self, x, y, w, h, char): + """ + Add current character to last event + """ + self.events[-1].text += char + # Update w for all chars in self.events[-1] + x2 = x + w - 1 + self.events[-1].w = x2 - self.events[-1].x + 1 + # Update y & h, if needed, for all chars in self.events[-1] + y1 = y + if y1 > self.events[-1].y: + # Keep the lowest Y in Y1 + y1 = self.events[-1].y + y2 = y + h - 1 + if y2 < (self.events[-1].y + self.events[-1].h): + # Keep the highest Y in Y2 + y2 = self.events[-1].y + self.events[-1].h - 1 + self.events[-1].y = y1 + self.events[-1].h = y2 - y1 + 1 + + def analyze_bitmap(self, x, y, w, h, bitmap): + """ + Check if provided bitmap is identical to a char in loaded fonts. + """ + # Space is now encoded as an empty character (no 'space' wasted :) + if len(bitmap) == 0: + char = ' ' + else: + char = self.find_char_from_bitmap(bitmap) + # a char was 'recognised': is it on the same line than previous one? + if char: + # Compute difference with X coord from previous event + # if x_diff > MAX_BLANK_SPACE the char is not on same sentence + if self.events: + x_diff = x - (self.events[-1].x + self.events[-1].w) + if x_diff < 0: + x_diff = -x_diff + # Try to find if that char can be added to previous event + if self.events and y < (self.events[-1].y + self.events[-1].h) \ + and x_diff < OCR.MAX_BLANK_SPACE: + # Add this character to previous event + self.store_char_in_last_event(x, y, w, h, char) + else: + # create a new TextEvent if there are no events yet or if there is a new line + self.events.append(TextEvent(char, x, y, w, h)) - def analyze_bitmap(self, data: bytes): + def analyze_bitmap_bagl(self, data: bytes): if data[0] != 0: return @@ -221,49 +318,27 @@ def analyze_bitmap(self, data: bytes): if (len(bitmap) * 8) % w: h += 1 - # Space is now encoded as an empty character (no 'space' wasted :) - if len(bitmap) == 0: - char = ' ' - legacy = False + if OCR.legacy: + self.analyze_bitmap_legacy(x, y, w, h, bitmap) else: - legacy, char = find_char_from_bitmap(bitmap) - if char: - if legacy: - # char is not from a loaded JSON font => keep legacy behaviour - if self.events and y <= self.events[-1].y: - self.events[-1].text += char - else: - # create a new TextEvent if there are no events yet - # or if there is a new line - self.events.append(TextEvent(char, x, y, w, h)) - return - # char was found in a loaded JSON font => new behaviour - if self.events: - x_diff = x - (self.events[-1].x + self.events[-1].w) - if x_diff < 0: - x_diff = -x_diff - # if x_diff > MAX_BLANK_SPACE the char is not on same word - if self.events and y < (self.events[-1].y + self.events[-1].h) \ - and x_diff < OCR.MAX_BLANK_SPACE: - # Add this character to previous event - self.events[-1].text += char - # Update w for all chars in self.events[-1] - x2 = x + w - 1 - self.events[-1].w = x2 - self.events[-1].x + 1 - # Update y & h, if needed, for all chars in self.events[-1] - y1 = y - if y1 > self.events[-1].y: - # Keep the lowest Y in Y1 - y1 = self.events[-1].y - y2 = y + h - 1 - if y2 < (self.events[-1].y + self.events[-1].h): - # Keep the highest Y in Y2 - y2 = self.events[-1].y + self.events[-1].h - 1 - self.events[-1].y = y1 - self.events[-1].h = y2 - y1 + 1 - else: - # create a new TextEvent if there are no events yet or if there is a new line - self.events.append(TextEvent(char, x, y, w, h)) + self.analyze_bitmap(x, y, w, h, bitmap) + + def analyze_bitmap_nbgl(self, data: bytes): + """ + data contains (check sys_nbgl_front_draw_img_rle in src/bolos/nbgl.c) + - area (sizeof(nbgl_area_t)) + - compressed bitmap (buffer_len) + - foreground_color (1 byte) + - nb_skipped_bytes (1 byte) + """ + area = nbgl_area_t.parse(data[0:nbgl_area_t.sizeof()]) + bitmap = data[nbgl_area_t.sizeof():-2] + if OCR.legacy: + self.analyze_bitmap_legacy( + area.x0, area.y0, area.width, area.height, bitmap) + else: + self.analyze_bitmap( + area.x0, area.y0, area.width, area.height, bitmap) def get_events(self) -> List[TextEvent]: events = self.events.copy() diff --git a/speculos/mcu/seproxyhal.py b/speculos/mcu/seproxyhal.py index 1f7ed945..5e97faa1 100644 --- a/speculos/mcu/seproxyhal.py +++ b/speculos/mcu/seproxyhal.py @@ -244,7 +244,8 @@ def __init__(self, automation_server: Optional[AutomationServer] = None, transport: str = 'hid', fonts_path: str = None, - api_level=None): + api_level=None, + model=None): self.s = s self.logger = logging.getLogger("seproxyhal") self.printf_queue = '' @@ -263,7 +264,7 @@ def __init__(self, self.usb = usb.USB(self.socket_helper.queue_packet, transport=transport) - self.ocr = OCR(fonts_path, api_level) + self.ocr = OCR(fonts_path, api_level, model) # A list of callback methods when an APDU response is received self.apdu_callbacks: List[Callable] = [] @@ -359,7 +360,7 @@ def can_read(self, s: socket.socket, screen): self.logger.debug("SephTag.SCREEN_DISPLAY_RAW_STATUS") screen.display_raw_status(data) if screen.model in ["nanox", "nanosp"]: - self.ocr.analyze_bitmap(data) + self.ocr.analyze_bitmap_bagl(data) if tag != SephTag.BAGL_DRAW_BITMAP: self.socket_helper.send_packet(SephTag.DISPLAY_PROCESSED_EVENT) if screen.rendering == RENDER_METHOD.PROGRESSIVE: @@ -428,7 +429,7 @@ def can_read(self, s: socket.socket, screen): screen.nbgl.hal_draw_image(data) elif tag == SephTag.NBGL_DRAW_IMAGE_RLE: - self.ocr.analyze_bitmap(data) + self.ocr.analyze_bitmap_nbgl(data) screen.nbgl.hal_draw_image_rle(data) elif tag == SephTag.NBGL_DRAW_IMAGE_FILE: