Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug] "Exception: vector" crash in pattern language #2108

Open
1 task
berkus opened this issue Feb 11, 2025 · 23 comments
Open
1 task

[Bug] "Exception: vector" crash in pattern language #2108

berkus opened this issue Feb 11, 2025 · 23 comments
Labels
bug Something isn't working

Comments

@berkus
Copy link

berkus commented Feb 11, 2025

Operating System

MacOS

What's the issue you encountered?

Image

This is all I get, how can I go about debugging my pattern code?

How can the issue be reproduced?

Run this script

import std.mem;
import std.io;

#pragma loop_limit 1000000

//==============================================================================
// Decompression routine
//==============================================================================

struct Subrange<auto Size> {
    u8 data[Size];
} [[hidden]];

fn decompress(auto output, u24 size) {

    // std::mem::copy_value_to_section(auto value, std::mem::Section to_section, u64 to_address);
    // std::mem::copy_value_to_section(h, decompressed, 0x0);

    // FC, FD, FE, FF - end marker (low 3 bits - uncompressed remainder to read)
    // 80 - read up to 3 bytes raw (low 3 bits), then lookup 1 byte offset ago with 1 byte len
    // 40 - read up to 3 bytes raw (low 3 bits), then lookup 2 byte offset ago with 1 byte len
    // 20 - read up to 3 bytes raw (low 3 bits), then lookup 3 byte offset ago with 1 byte len
    // otherwise - read up to 1 byte len uncompressed bytes from input

    // Initialization
    // unsigned int datapos, len, offset, inbyte, tmp1, tmp2, tmp3;
    // unsigned char *srcpos, *dstpos;
    // datapos = len = offset = inbyte = tmp1 = tmp2 = tmp3 = 0;
    //std::mem::read_unsigned($, 5);

    std::print("{}", $);
    
    u8 inbyte = std::mem::read_unsigned($, 1); $ += 1;
    std::print("=> 0x{:02x}", inbyte);
    u32 datapos = 0;

    // Decompress
    while ((!std::mem::eof()) && (inbyte < 0xFC))
    {
        std::print("=> 0x{:02x}", inbyte);
        if (inbyte & 0x80 == 0)
        {
            u8 offset_lo = std::mem::read_unsigned($, 1); $ += 1;
            u8 len = inbyte & 0x03;
            if (len != 0)
            {
                Subrange<len> input_data @ $;
                std::mem::copy_value_to_section(input_data, output, datapos);
                datapos += len;
                $ += len;
            }
            u32 dict_len = ((inbyte & 0x1C) >> 2) + 3;
            u32 offset = ((inbyte >> 5) << 8) + offset_lo + 1;
            Subrange<dict_len> dict_data @ datapos - offset in output;
            std::mem::copy_value_to_section(dict_data, output, datapos);
            datapos += dict_len;
        }
        else if (inbyte & 0x40 == 0)
        {
            u8 offset_hi = std::mem::read_unsigned($, 1); $ += 1;
            u8 offset_lo = std::mem::read_unsigned($, 1); $ += 1;
            u8 len = (offset_hi >> 6) & 0x03;
            if (len != 0)
            {
                Subrange<len> input_data @ $;
                std::mem::copy_value_to_section(input_data, output, datapos);
                datapos += len;
                $ += len;
            }
            u32 dict_len = (inbyte & 0x3F) + 4;
            u32 offset = ((offset_hi & 0x3F) * 256) + offset_lo + 1;
            Subrange<dict_len> dict_data @ datapos - offset in output;
            std::mem::copy_value_to_section(dict_data, output, datapos);
            datapos += dict_len;
        }
        else if (inbyte & 0x20 == 0)
        {
            u8 offset_hi = std::mem::read_unsigned($, 1); $ += 1;
            u8 offset_lo = std::mem::read_unsigned($, 1); $ += 1;
            u8 len_lo = std::mem::read_unsigned($, 1);
            u8 len = inbyte & 0x03;
            if (len != 0)
            {
                Subrange<len> input_data @ $;
                std::mem::copy_value_to_section(input_data, output, datapos);
                datapos += len;
                $ += len;
            }
            u32 dict_len = (((inbyte >> 2) & 0x03) * 256) + len_lo + 5;
            if (len != 0)
            {
                u32 offset = ((inbyte & 0x10) << 0x0C) + (offset_hi * 256) + offset_lo + 1;
                Subrange<dict_len> dict_data @ datapos - offset in output;
                std::mem::copy_value_to_section(dict_data, output, datapos);
                datapos += dict_len;
            }
        }
        else
        {
            u8 len = ((inbyte & 0x1F) * 4) + 4;
            if (len != 0)
            {
                Subrange<len> input_data @ $;
                std::mem::copy_value_to_section(input_data, output, datapos);
                datapos += len;
                $ += len;
            }
        }

        inbyte = std::mem::read_unsigned($, 1); $ += 1;
    }

    if (!std::mem::eof() && (datapos < size))
    {
        u8 len = inbyte & 0x03;
        if (len != 0)
        {
            Subrange<len> input_data @ $;
            std::mem::copy_value_to_section(input_data, output, datapos);
            datapos += len;
            $ += len;
        }
    }
};

//==============================================================================
// Main structure
//==============================================================================

struct Compressed {
    std::print("3 {}", $);
    u24 uncompressedSize;
    //u8 compressedContents[std::mem::size() - 5] @ 0x5;

    std::print("4 {}", $);
    std::print("uncomp size {}", uncompressedSize);
    std::mem::Section decompressed = std::mem::create_section("LZ-like decompressed");
    std::mem::set_section_size(decompressed, uncompressedSize);

    decompress(decompressed, uncompressedSize);

    u8 d[uncompressedSize] @ 0x00 in decompressed;
    builtin::hex::core::add_virtual_file(std::format("uncompressed-{}", hex::prv::get_information("file_name")), d);
    std::warning("This CRP is compressed, grab the uncompressed save from\nthe Virtual Filesystem tab and use this pattern on it again.");
};

struct Main {
    std::print("1 {}", $);
    u16 compressionMarker @ 0x0 [[hidden]];
    std::print("2 {}", $);
    if (compressionMarker == 0xFB10) {
        Compressed @ 0x2;
    }
} [[inline]];

Main m @ 0x0;

ImHex Version

1.36.2

ImHex Build Type

  • Nightly or built from sources

Installation type

brew install imhex

Additional context?

  • I am not asking to help me debug the script
  • I am asking about how to debug the script given the lack of any useful information in the exception - how to even decipher "vector"?
@berkus berkus added the bug Something isn't working label Feb 11, 2025
@berkus
Copy link
Author

berkus commented Feb 11, 2025

With printf-debugging it is clear that some index gets negative, but that's not visible in the error message.

I: Second branch => load 0x01 from 0x00001102 to 0x00002808
I: Second branch => dict 0x04 from 0x-00000f2 to 0x00002809

@berkus
Copy link
Author

berkus commented Feb 11, 2025

With printf debugging ImHex completely choked one core and crawled to a near halt, after 15 seconds it doesn't even printf anymore.

@berkus
Copy link
Author

berkus commented Feb 11, 2025

slow-imhex.mov

@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

In order to reproduce the error you are getting we will also need a sample input file to run the pattern on.

From your description it looks like your code is creating an exception when it runs. The exception has to do with the c++ vector class used internally to create data types.

Pattern language has no printf so you probably mean debugging using std::print().

If you use print() in a way that produces tons of output it is possible to overwhelm you resources, you need to avoid doing that by not printing everything everywhere.

Also use the pattern language debugger to some extent to help you determine the source of the error.

@berkus
Copy link
Author

berkus commented Feb 11, 2025

dealer2a.crp.zip

uncompress to dealer2a.crp and run the pattern script on it, I restored the bugs in the code so it will throw this exception again.

If ImHex unfreezes I will also copy the script with print() lines to show how badly it stalls.

@berkus
Copy link
Author

berkus commented Feb 11, 2025

I used the debugger, but it's really not very useful when the error happens on iteration 30,000 - i will literally die pressing continue 30,000 times.

@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

You can also put and if statement that checks the iteration number and create a dummy statement when it is true where you can place the breakpoint .

@berkus
Copy link
Author

berkus commented Feb 11, 2025

Ideally, it should break into the debugger when a script exception is thrown (provided there is no try/catch).

@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

there is also try{}catch{} statement that will take care of that.

@berkus
Copy link
Author

berkus commented Feb 11, 2025

But it doesn't let you use the debugger at the point where exception happened, exactly what I'm saying.

@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

this is not an error in the pattern language. you are creating an exception in the c++ code that runs the pattern language and crashing imhex when it is triggered. That should never happen regardless of the problems that the script may have but it cannot be debugged in pattern language and needs to be fixed so it doesnt occur.

@berkus
Copy link
Author

berkus commented Feb 11, 2025

Okay, I believe the problem is the negative index, as I posted in my second message - could it be the scripting engine does not process that correctly?

Or maybe just an out of bounds index if it's treated as unsigned.

@berkus
Copy link
Author

berkus commented Feb 11, 2025

I did not manage to wait the std::print() version - it was getting slower and slower with each step, so I had to kill ImHex.

@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

it is crashing when it tries to create the stack trace, so it isn't the negative index per se.

@WerWolv
Copy link
Owner

WerWolv commented Feb 11, 2025

The thing where large console messages would cause ImHex to slow down a lot was fixed a few days ago btw

@berkus
Copy link
Author

berkus commented Feb 11, 2025

The thing where large console messages would cause ImHex to slow down a lot was fixed a few days ago btw

Oh, awesome!

@berkus
Copy link
Author

berkus commented Feb 11, 2025

it is crashing when it tries to create the stack trace, so it isn't the negative index per se.

So this seems like an actual bug that needs to be fixed?

@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

The error is caused by this code I think:

          if (len != 0)
            {
                u32 offset = ((inbyte & 0x10) << 0x0C) + (offset_hi * 256) + offset_lo + 1;
                Subrange<dict_len> dict_data @ datapos - offset in output;
                std::mem::copy_value_to_section(dict_data, output, datapos);
                datapos += dict_len;
            }

that probably should be if (dict_len !=0)

@berkus
Copy link
Author

berkus commented Feb 11, 2025

No, this was fixed (if you look at the message edits), but the vector error remained. The bug is on this line:

u8 len_lo = std::mem::read_unsigned($, 1);

@berkus
Copy link
Author

berkus commented Feb 11, 2025

As I said in the OP, I do not need help debugging the script itself, I fixed it and it works.

I need help with general debuggability of the script writing process where errors "Exception: vector" are not extremely helpful to diagnose what has gone wrong.

@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

Maybe I wasn't clear enough about stating the fact that this is an internal bug in imhex that cannot be debugged from pattern language. When errors that can be debugged occur, ImHex does not crash and you get a message indicating what went wrong. In this case imhex crashes so it is impossible to fix the problem by debugging the script. It may help, but the bug needs to be fixed in the source code of pattern language.

@berkus
Copy link
Author

berkus commented Feb 11, 2025

Great, and this is what this ticket is about, thanks!

@berkus berkus changed the title [Bug] How to debug a pattern script? [Bug] "Exception: vector" crash in pattern language Feb 11, 2025
@paxcut
Copy link
Contributor

paxcut commented Feb 11, 2025

I know you don't need help debugging the code and that it is working for you, but I wanted to mention that I also got it working without changing the place you said is causing the error. What I did is make sure that only positive len and dict_lens are used in all the different cases.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

3 participants