From 53b65502373e393a34e8cd74c81ebf6861eaca8b Mon Sep 17 00:00:00 2001 From: Lukasz Migas Date: Tue, 13 Aug 2024 21:02:21 +0200 Subject: [PATCH] Fixes --- src/koyo/click.py | 4 ++-- src/koyo/dataframe.py | 42 ++++++++++++++++++++++++++++++++++++++++++ tests/test_click.py | 1 + 3 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 src/koyo/dataframe.py diff --git a/src/koyo/click.py b/src/koyo/click.py index ac2fabe..eaf6eb3 100644 --- a/src/koyo/click.py +++ b/src/koyo/click.py @@ -422,12 +422,12 @@ def timed_iterator( func(f"[{i}/{n_tasks}] {_text} {format_human_time_s(execution_time)} [{avg}{rem}{tot}]") -def parse_arg(arg: str, key: str): +def parse_arg(arg: str, key: str) -> tuple[str, ty.Any]: """Parse argument.""" try: if key: arg = arg.split(key)[1] - name, value = arg.split("=") + name, value = arg.split("=", maxsplit=1) # try parsing value - it will fail if string value was provided try: value = literal_eval(value) diff --git a/src/koyo/dataframe.py b/src/koyo/dataframe.py new file mode 100644 index 0000000..76cbb0b --- /dev/null +++ b/src/koyo/dataframe.py @@ -0,0 +1,42 @@ +"""Dataframe utilities""" + +from __future__ import annotations +from pathlib import Path +import typing as ty +from koyo.typing import PathLike + +if ty.TYPE_CHECKING: + import pandas as pd + + +def read_csv_with_comments(path: PathLike) -> pd.DataFrame: + """Read CSV with comments.""" + import pandas as pd + + path = Path(path) + try: + df = pd.read_csv(path) + first_col = df.iloc[:, 0] + # check whether any of the values start with # (comment) + for value in first_col: + if value.startswith("# "): + df = df[~first_col.str.contains("# ")] + df.reset_index(drop=True, inplace=True) + df.columns = df.iloc[0] + df.drop(df.index[0], inplace=True) + break + except pd.errors.ParserError: + from io import StringIO + + data = path.read_text().split("\n") + start_index, end_index = 0, 0 + for row in data: + if row.startswith("#"): + start_index += 1 + continue + elif not row: + end_index += 1 + df = pd.read_csv(StringIO("\n".join(data[start_index:-end_index])), sep=",") + except Exception: + raise pd.errors.ParserError(f"Failed to parse grid '{path}'.") + return df diff --git a/tests/test_click.py b/tests/test_click.py index 2ac6bd6..4c80081 100644 --- a/tests/test_click.py +++ b/tests/test_click.py @@ -10,6 +10,7 @@ ("n=5", ("n", 5)), ("n=5.0", ("n", 5.0)), ("n=[10,20,30]", ("n", [10, 20, 30])), + ("n=value=with=equal=sign", ("n", "value=with=equal=sign")), ], ) def test_parse_arg(arg, expected):