Skip to content

Commit

Permalink
Update tokenizer to split on whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
DeanEby committed Feb 7, 2025
1 parent e5da070 commit cd7328e
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
1 change: 0 additions & 1 deletion analyzers/hashtags/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def gini(x):


def main(context: PrimaryAnalyzerContext):

input_reader = context.input()
df_input = input_reader.preprocess(pl.read_parquet(input_reader.parquet_path))

Expand Down
8 changes: 6 additions & 2 deletions importing/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ def _separator_option(previous_value: Optional[str]) -> Optional[str]:
default=(
previous_value
if previous_value in [",", ";", "\t"]
else "other" if previous_value is not None else None
else "other"
if previous_value is not None
else None
),
)
if input is None:
Expand All @@ -133,7 +135,9 @@ def _quote_char_option(previous_value: Optional[str]) -> Optional[str]:
default=(
previous_value
if previous_value in ['"', "'"]
else "other" if previous_value is not None else None
else "other"
if previous_value is not None
else None
),
)
if input is None:
Expand Down

0 comments on commit cd7328e

Please sign in to comment.