Skip to content

Commit

Permalink
Instrument import timings
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Nov 15, 2023
1 parent 4042f69 commit b1c04c1
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions py-polars/tests/unit/test_polars_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@ def _import_timings() -> bytes:
# assemble suitable command to get polars module import timing;
# run in a separate process to ensure clean timing results.
cmd = f'{sys.executable} -X importtime -c "import polars"'
output = (
return (
subprocess.run(cmd, shell=True, capture_output=True)
.stderr.replace(b"import time:", b"")
.strip()
)
return output


def _import_timings_as_frame(n_tries: int) -> tuple[pl.DataFrame, int]:
Expand All @@ -56,9 +55,15 @@ def _import_timings_as_frame(n_tries: int) -> tuple[pl.DataFrame, int]:

import_timings.append(df_import)

# note: if a qualifying import time was already achieved, we won't get here
df_fastest_import = sorted(import_timings, key=_import_time_from_frame)[0]
return df_fastest_import, _import_time_from_frame(df_fastest_import)
# note: if a qualifying import time was already achieved, we won't get here.
# if we do, let's see all the failed timings to help see what's going on:
import_times = [_import_time_from_frame(df) for df in import_timings]
msg = "\n".join(f"({idx}) {tm:,}μs" for idx, tm in enumerate(import_times))
min_max = f"Min => {min(import_times):,}μs, Max => {max(import_times):,}μs)"
print(f"\nImport times achieved over {n_tries} tries:\n{min_max}\n\n{msg}")

sorted_timing_frames = sorted(import_timings, key=_import_time_from_frame)
return sorted_timing_frames[0], min(import_times)


@pytest.mark.skipif(sys.platform == "win32", reason="Unreliable on Windows")
Expand All @@ -70,7 +75,7 @@ def test_polars_import() -> None:

# note: reduce noise by allowing up to 'n' tries (but return immediately if/when
# a qualifying time is achieved, so we don't waste time running unnecessary tests)
df_import, polars_import_time = _import_timings_as_frame(n_tries=5)
df_import, polars_import_time = _import_timings_as_frame(n_tries=10)

with pl.Config(
# get a complete view of what's going on in case of failure
Expand Down

0 comments on commit b1c04c1

Please sign in to comment.