From 6c1ad8c9609e7a86d74152f6a4daad42438ad476 Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 15 Mar 2024 11:59:03 +0000 Subject: [PATCH] wip --- narwhals/pandas_like/utils.py | 1 - t.py | 180 +++++++++++++++++----------------- 2 files changed, 89 insertions(+), 92 deletions(-) diff --git a/narwhals/pandas_like/utils.py b/narwhals/pandas_like/utils.py index 65c9b59cb..cd324b03e 100644 --- a/narwhals/pandas_like/utils.py +++ b/narwhals/pandas_like/utils.py @@ -114,7 +114,6 @@ def evaluate_into_expr(df: PandasDataFrame, into_expr: IntoExpr) -> list[PandasS """ Return list of raw columns. """ - expr = parse_into_expr(df._implementation, into_expr) return expr._call(df) diff --git a/t.py b/t.py index ff1a81042..6e4414887 100644 --- a/t.py +++ b/t.py @@ -5,123 +5,121 @@ import narwhals as nw -df_raw = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) -df = nw.DataFrame(df_raw, is_lazy=True) -df_raw_2 = pd.DataFrame({"a": [1, 3], "c": [7, 9]}) -df2 = nw.DataFrame(df_raw_2, is_lazy=True) +# df_raw = pd.DataFrame({"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}) +# df = nw.DataFrame(df_raw, is_lazy=True) +# df_raw_2 = pd.DataFrame({"a": [1, 3], "c": [7, 9]}) +# df2 = nw.DataFrame(df_raw_2, is_lazy=True) -result = df.sort("a", "b") -print(nw.to_native(result)) +# result = df.sort("a", "b") +# print(nw.to_native(result)) -result = df.filter(nw.col("a") > 1) -print(nw.to_native(result)) +# result = df.filter(nw.col("a") > 1) +# print(nw.to_native(result)) -result = df.with_columns( - c=nw.col("a") + nw.col("b"), - d=nw.col("a") - nw.col("a").mean(), -) -print(nw.to_native(result)) -result = df.with_columns(nw.all() * 2) -print(nw.to_native(result)) +# result = df.with_columns( +# c=nw.col("a") + nw.col("b"), +# d=nw.col("a") - nw.col("a").mean(), +# ) +# print(nw.to_native(result)) +# result = df.with_columns(nw.all() * 2) +# print(nw.to_native(result)) -result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b"))) -print(nw.to_native(result)) -result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b"))) -print(nw.to_native(result)) +# result = df.with_columns(horizonal_sum=nw.sum_horizontal(nw.col("a"), nw.col("b"))) +# print(nw.to_native(result)) +# result = df.with_columns(horizonal_sum=nw.sum_horizontal("a", nw.col("b"))) +# print(nw.to_native(result)) -result = df.select(nw.all().sum()) -print(nw.to_native(result)) -result = df.select(nw.col("a", "b") * 2) -print(nw.to_native(result)) +# result = df.select(nw.all().sum()) +# print(nw.to_native(result)) +# result = df.select(nw.col("a", "b") * 2) +# print(nw.to_native(result)) -# # TODO! -# # result = ( -# # df.collect() -# # .group_by("b") -# # .agg( -# # nw.all().sum(), -# # ) -# # ) -# # print(nw.to_native(result)) - -result = ( - df.collect() - .group_by("b") - .agg( - nw.col("a").sum(), - simple=nw.col("a").sum(), - complex=(nw.col("a") + 1).sum(), - other=nw.sum("a"), - ) -) -print(nw.to_native(result)) -print("multiple simple") -result = ( - df.collect() - .group_by("b") - .agg( - nw.col("a", "z").sum(), - ) -) -print(nw.to_native(result)) +# # # TODO! +# # # result = ( +# # # df.collect() +# # # .group_by("b") +# # # .agg( +# # # nw.all().sum(), +# # # ) +# # # ) +# # # print(nw.to_native(result)) + +# result = ( +# df.collect() +# .group_by("b") +# .agg( +# nw.col("a").sum(), +# simple=nw.col("a").sum(), +# complex=(nw.col("a") + 1).sum(), +# other=nw.sum("a"), +# ) +# ) +# print(nw.to_native(result)) +# print("multiple simple") +# result = ( +# df.collect() +# .group_by("b") +# .agg( +# nw.col("a", "z").sum(), +# ) +# ) +# print(nw.to_native(result)) -result = df.join(df2, left_on="a", right_on="a") -print(nw.to_native(result)) +# result = df.join(df2, left_on="a", right_on="a") +# print(nw.to_native(result)) -result = df.rename({"a": "a_new", "b": "b_new"}) -print(nw.to_native(result)) +# result = df.rename({"a": "a_new", "b": "b_new"}) +# print(nw.to_native(result)) -result = df.collect().to_dict() -print(result) -print(polars.from_pandas(nw.to_native(df)).to_dict()) +# result = df.collect().to_dict() +# print(result) +# print(polars.from_pandas(nw.to_native(df)).to_dict()) -result = df.collect().to_dict(as_series=False) -print("this") -print(result) -print("that") -print(polars.from_pandas(nw.to_native(df)).to_dict(as_series=False)) +# result = df.collect().to_dict(as_series=False) +# print("this") +# print(result) +# print("that") +# print(polars.from_pandas(nw.to_native(df)).to_dict(as_series=False)) -agg = (nw.col("b") - nw.col("z").mean()).mean() -print(nw.to_native(df.with_columns(d=agg))) -result = df.group_by("a").agg(agg) -print(nw.to_native(result)) +# agg = (nw.col("b") - nw.col("z").mean()).mean() +# print(nw.to_native(df.with_columns(d=agg))) +# result = df.group_by("a").agg(agg) +# print(nw.to_native(result)) -print(nw.col("a") + nw.col("b")) -print(nw.col("a", "b").sum()) +# print(nw.col("a") + nw.col("b")) +# print(nw.col("a", "b").sum()) -result = df.select(nw.col("a", "b").sum()) -print(nw.to_native(result)) +# result = df.select(nw.col("a", "b").sum()) +# print(nw.to_native(result)) -print(df.schema) +# print(df.schema) # print(df.schema['a'].is_numeric()) -# df_raw = pd.DataFrame({ -# "a": [1, 3, 2], -# "b": [4., 4, 6], -# 'c': ['a', 'b', 'c'], -# 'd': [True, False, True], -# }) -# df, pl = narwhals.to_polars_api(df_raw) +df_raw = pd.DataFrame( + { + "a": [1, 3, 2], + "b": [4.0, 4, 6], + "c": ["a", "b", "c"], + "d": [True, False, True], + } +) +df = nw.DataFrame(df_raw) # print(df.schema) # print(df.schema['a'].is_numeric()) # print(df.schema['b'].is_numeric()) # print(df.schema['c'].is_numeric()) # print(df.schema['d'].is_numeric()) -# result = df.with_columns(nw.col('a').cast(pl.Float32)) +# result = df.with_columns(nw.col('a').cast(nw.Float32)) # print(nw.to_native(result)) -# print(result._dataframe.dtypes) +# print(result._dataframe._dataframe.dtypes) # print(df.schema) -# result = df.select([col for (col, dtype) in df.schema.items() if dtype == pl.Float64]) -# print(nw.to_native(result)) -# print(result._dataframe.dtypes) - -# print(nw.all() + nw.col("a")) -# result = df.select(nw.all() + nw.col("a")) +# result = df.select([col for (col, dtype) in df.schema.items() if dtype == nw.Float64]) # print(nw.to_native(result)) -# print(result._dataframe.dtypes) +# print(result._dataframe._dataframe.dtypes) -# print(result.collect()) +result = df.select("a", "b").select(nw.all() + nw.col("a")) +print(nw.to_native(result))