From 7fffd08d798cefd9c62d0897fa5f60015a21da3b Mon Sep 17 00:00:00 2001 From: Yanick Fratantonio Date: Tue, 21 Jan 2025 16:54:53 +0000 Subject: [PATCH] gh workflow: add poc to show different onnxruntime results on windows --- .../onnxruntime-windows-diff-poc.yml | 45 +++++++++++++++++++ python/scripts/extract_features.py | 39 ++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 .github/workflows/onnxruntime-windows-diff-poc.yml create mode 100644 python/scripts/extract_features.py diff --git a/.github/workflows/onnxruntime-windows-diff-poc.yml b/.github/workflows/onnxruntime-windows-diff-poc.yml new file mode 100644 index 00000000..86da7679 --- /dev/null +++ b/.github/workflows/onnxruntime-windows-diff-poc.yml @@ -0,0 +1,45 @@ +name: POC for debugging windows onnxruntime diff + +on: + workflow_dispatch: + pull_request: + +permissions: + contents: read + +jobs: + run-inference: + strategy: + matrix: + python-version: ["3.12"] + os: [ "ubuntu-latest", "macos-latest", "windows-latest" ] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # pin@v4 + - name: Setup Python + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # pin@v5 + with: + python-version: '${{ matrix.python-version }}' + - name: Install magika with pip + run: python3 -m pip install magika==0.6.0rc3 + - run: python3 -c "import magika, pathlib; m = magika.Magika(); res = m.identify_path(pathlib.Path('tests_data/basic/python/code.py')); print(res)" + - run: python3 ./python/scripts/extract_features.py + + # do hexdump + - if: matrix.os == 'windows-latest' + shell: pwsh + run: | + Format-Hex tests_data/basic/python/code.py + - if: matrix.os == 'ubuntu-latest' + run: hexdump -C tests_data/basic/python/code.py + + # get the hash + - if: matrix.os == 'windows-latest' + shell: pwsh + run: | + Get-FileHash tests_data/basic/python/code.py + - if: matrix.os == 'ubuntu-latest' + run: sha256sum tests_data/basic/python/code.py + - if: matrix.os == 'macos-latest' + run: shasum -a256 tests_data/basic/python/code.py + diff --git a/python/scripts/extract_features.py b/python/scripts/extract_features.py new file mode 100644 index 00000000..f506b221 --- /dev/null +++ b/python/scripts/extract_features.py @@ -0,0 +1,39 @@ +import hashlib +from pathlib import Path + +import magika + +test_path = Path(__file__).parent.parent.parent / "tests_data/basic/python/code.py" + +m = magika.Magika() + +fs = m._extract_features_from_path( + test_path, + beg_size=1024, + mid_size=0, + end_size=1024, + padding_token=256, + block_size=4096, + use_inputs_at_offsets=False, +) + + +def serialize(fs): + return hashlib.sha256(str(fs.beg + fs.end).encode("ascii")).hexdigest() + + +print(fs) +print(serialize(fs)) + +f = open(test_path, "rb") +content_bytes = f.read() +print(f"content bytes len: {len(content_bytes)}") +f.close() + +content_bytes_2 = test_path.read_bytes() +print(f"content bytes 2 len: {len(content_bytes_2)}") + +print(f"file size: {test_path.stat().st_size}") + +content_bytes_ints = list(map(int, content_bytes[:64])) +print(content_bytes_ints)