Skip to content

Commit

Permalink
docs: add pandas snippets
Browse files Browse the repository at this point in the history
  • Loading branch information
niekdt committed Sep 27, 2024
1 parent cddb7b9 commit b0c2b6a
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 7 deletions.
80 changes: 76 additions & 4 deletions src/actionsheets/data/python/pandas/pandas.dataframe.toml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ action = "Contains no NA"
code = "data.notna().all().all()"

[test.contains.na.col]
action = "Does column _col_ contain NA value(s)"
action = "Does column _col_ contain any NA values"
code = "data[col].hasnan"

[test.contains.na.col.none]
Expand All @@ -166,6 +166,26 @@ code = "~data[col].hasnan"
action = "Do none of the columns _cols_ contain NA values"
code = "data[['col1', 'col2']].notnull().all().all()"

[test.contains.'inf']
action = "Contains any infinity"
code = """
import numpy as np
data.isin([np.inf, -np.inf]).values.any()
"""

[test.contains.'inf'.col]
action = "Does column _col_ contain any infinite values"
code = """
import numpy as np
data['col'].isin([np.inf, -np.inf]).any()
"""

[test.contains.'inf'.col.2]
code = """
import numpy as np
np.isinf(data['col']).any()
"""


[test.duplicate]
section = "Tests for duplicates"
Expand Down Expand Up @@ -265,6 +285,11 @@ code = "data.duplicated()"
action = "Mask for duplicates across columns in list _cols_"
code = "data.duplicated(subset=cols)"

[extract.row.count.group]
action = "Extract row count by group _group_"
code = "data.groupby('group').size().reset_index(name='count')"
details = "Returns a data frame with group columns and a respective count column"


[query]
section = "Query"
Expand Down Expand Up @@ -360,6 +385,10 @@ details = "Overwrites existing column"
action = "Append undefined categorical column _col_"
code = "data['col'] = pd.Categorical([None] * len(data), categories=['a', 'b', 'c'])"

[update.grow.col.append.str.concat]
action = "Add string column _col_ based on concatenation of two string columns _x_ and _y_, with separator _sep_"
code = "data['col'] = data['x'] + 'sep' + data['y']"

[update.grow.rownum]
action = "Add column with row number (ignore index)"
code = "data['num'] = range(len(data))"
Expand Down Expand Up @@ -430,6 +459,12 @@ code = "pd.concat(data, [s1, s2], axis=1)"
action = "Insert column"
code = "data.insert(x)"

[derive.grow.col.str.split]
action = "Split string column _col_ into two columns based on separator _sep_"
code = """
data['col'].str.split('sep', n=1, expand=True)
"""


[derive.shrink]
section = "Shrink"
Expand All @@ -441,14 +476,40 @@ section = "Reduce number of rows"
action = "Keep first _n_ rows"
code = "data.head(n)"

[derive.shrink.row.remove.head]
action = "Remove first _n_ rows"
code = "data[n:]"

[derive.shrink.row.tail]
action = "Keep last _n_ rows"
code = "data.tail(n)"

[derive.shrink.row.remove.tail]
action = "Remove last _n_ rows"
code = "data[:-n]"

[derive.shrink.row.pop]
action = "Pop row"
code = "data.pop()"

[derive.shrink.row.largest]
action = "Select _n_ largest rows according to column _col_"
code = "data.nlargest(n, col)"

[derive.shrink.row.largest.group]
action = "Select _n_ largest rows per group _group_, according to column _col_"
code = "data.groupby('group').apply(lambda x: x.nlargest(n=n, columns='col')).reset_index(drop=True))"

[derive.shrink.row.last.group]
action = "Select last row per group _group_"
code = "data.groupby('group').last().reset_index()"

[derive.shrink.row.max.group]
action = "Select the row per group _group_ with the largest value for column _col_"
code = """
data.loc[data.groupby('group')['col'].idxmax()].reset_index(drop=True)
"""

[derive.shrink.row.remove.list]
action = "Remove rows list _rows_"
code = "data.drop(rows)"
Expand Down Expand Up @@ -525,12 +586,21 @@ section = "Combine"
[derive.combine.concat]
action = "Concatenate rows of dataframes"
code = "pd.concat([df, df2, dfN])"
details = "Consider `ignore_index=True`"
details = "Consider `ignore_index=True` argument"

[derive.combine.concat.mix]
action = "Concatenate rows of dataframes, having partially overlapping columns"
code = "?"

[derive.combine.concat.cols]
action = "Concatenate columns of dataframes, assuming equal index, and assuming columns don't overlap"
code = "pd.concat([data, data2], axis=1)"
details = "Unsafe. First make sure indexes are aligned, or output has twice the number of rows"

[derive.combine.concat.cols.safe]
action = "Concatenate columns of dataframes, ignoring the index, and assuming columns don't overlap"
code = "pd.concat([data.reset_index(drop=True), data2.reset_index(drop=True)], axis=1)"

[derive.combine.merge.inner]
action = "Inner join"
code = "data.merge(data2, on=['sex', 'country'])"
Expand Down Expand Up @@ -622,11 +692,13 @@ section = "To file format"

[convert.file.csv]
action = "To CSV file"
code = "data.to_csv('file.csv')"
code = "data.to_csv('file.csv', index=False)"
details = "`index=False` is needed not to polute CSV with a meaningless index"

[convert.file.tsv]
action = "To TSV file"
code = "data.to_table('file.tsv')"
code = "data.to_csv('file.tsv', sep='\t', index=False)"
details = "`index=False` is needed not to polute TSV with a meaningless index"

[convert.file.json]
action = "To JSON file"
Expand Down
49 changes: 46 additions & 3 deletions src/actionsheets/data/python/pandas/pandas.series.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,40 @@ code = "import pandas as pd"
section = "Create"

[create.empty]
section = "Create empty series"

[create.empty.object]
action = "Empty series (of object type)"
code = "pd.Series()"

[create.empty.bool]
action = "Empty boolean series"
code = "pd.Series(dtype=bool)"

[create.empty.category.undefined]
action = "Empty categorical series without defined categories"
code = "pd.Series(dtype='category')"

[create.empty.category]
action = "Empty categorical series with pre-defined categories"
code = "pd.Categorical([], categories=['a', 'b', 'c'])"

[create.empty.int]
action = "Empty int series"
code = "pd.Series(dtype=int)"

[create.empty.float]
action = "Empty float series"
code = "pd.Series(dtype=float)"

[create.empty.date]
action = "Empty datetime series"
code = "pd.Series(dtype='datetime64[ns]')"


[create.constant]
section = "Create series of constant values"

[create.constant.na]
action = "Series filled with NAs of length _n_ (of object type)"
code = "pd.Series([None] * n)"
Expand All @@ -31,9 +62,13 @@ code = "pd.Series([None] * n)"
action = "Constant value _v_ of length _n_"
code = "pd.Series(v, index=range(n))"

[create.list]
action = "From list"
code = "pd.Series([1, 2, 3, 7])"

[create.define]
section = "Create series from a list of values"

[create.define.list]
action = "Object series from a generic list of values"
code = "pd.Series([1, None, 'a'])"

[create.define.int]
action = "Int series from a list of integers"
Expand Down Expand Up @@ -256,6 +291,14 @@ code = "x.min()"
action = "Greatest value, ignoring NAs"
code = "x.max()"

[extract.min.index]
action = "Index of the smallest value, ignoring NAs"
code = "x.idxmin()"

[extract.max.index]
action = "Index of the greatest value, ignoring NAs"
code = "x.idxmax()"

[extract.value.counts]
action = "Count occurrence per value"
code = "x.value_counts()"
Expand Down

0 comments on commit b0c2b6a

Please sign in to comment.