Skip to content

Commit

Permalink
Merge branch 'master' into maintenance/alexsherstinsky/use_defined_co…
Browse files Browse the repository at this point in the history
…nstants_and_unquoted_typehints-2023_10_15-10
  • Loading branch information
alexsherstinsky committed Oct 16, 2023
2 parents 12d0f55 + fa94f4d commit 3b78b5e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions ludwig/schema/features/preprocessing/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class BaseTextPreprocessingConfig(BasePreprocessingConfig):
)

lowercase: bool = schema_utils.Boolean(
default=True,
default=False,
description="If true, converts the string to lowercase before tokenizing.",
parameter_metadata=FEATURE_METADATA[TEXT][PREPROCESSING]["lowercase"],
)
Expand Down Expand Up @@ -205,7 +205,7 @@ class TextOutputPreprocessingConfig(BaseTextPreprocessingConfig):
)

lowercase: bool = schema_utils.Boolean(
default=True,
default=False,
description="If true, converts the string to lowercase before tokenizing.",
parameter_metadata=FEATURE_METADATA[TEXT][PREPROCESSING]["lowercase"],
)
Expand Down
20 changes: 10 additions & 10 deletions tests/integration_tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,17 +937,17 @@ def test_fill_with_mode_different_df_engine(tmpdir, csv_filename, df_engine, ray
You are a helpful chatbot. USER: {__sample__}: {country}, {year:.2f} ASSISTANT:
"""

expected_task_sample = """instruction: predict the output feature. return only values in {true, false}
expected_task_sample = """Instruction: predict the output feature. Return only values in {true, false}
###
examples:
Examples:
###
input: foo bar
output: true
Input: foo bar
Output: true
###
input: baz quc
output: false
Input: baz quc
Output: false
###
input:"""
Input:"""


@pytest.mark.llm
Expand All @@ -973,7 +973,7 @@ def test_fill_with_mode_different_df_engine(tmpdir, csv_filename, df_engine, ray
category_feature(name="country"),
number_feature(name="year"),
],
("you are a helpful chatbot. user: "),
("You are a helpful chatbot. USER: "),
),
],
ids=["task_sample", "multi_col"],
Expand All @@ -988,7 +988,7 @@ def test_prompt_template(input_features, expected, model_type, backend, tmpdir,
data_df = pd.read_csv(data_csv)
raw_values = [data_df[input_features[i][COLUMN]].values.tolist() for i in range(len(input_features))]

# Only use the first input featuere (text) and discard the others, which are only used for data gen
# Only use the first input feature (text) and discard the others, which are only used for data gen
input_features = input_features[:1]
config = {
MODEL_TYPE: model_type,
Expand Down Expand Up @@ -1032,7 +1032,7 @@ def test_prompt_template(input_features, expected, model_type, backend, tmpdir,
# Test formatting in parametrize uses 2 decimal places of precision
raw_text = f"{v:.2f}"
else:
raw_text = str(v).lower()
raw_text = str(v)
assert raw_text in decoded, f"'{raw_text}' not in '{decoded}'"


Expand Down
8 changes: 4 additions & 4 deletions tests/ludwig/utils/test_strings_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def test_create_vocabulary_chars():
)
vocab = vocabulary.vocab

assert len(vocab) == 24
assert len(vocab) == 27
assert vocab[strings_utils.SpecialSymbol.START.value] == strings_utils.START_SYMBOL
assert vocab[strings_utils.SpecialSymbol.STOP.value] == strings_utils.STOP_SYMBOL
assert vocab[strings_utils.SpecialSymbol.PADDING.value] == strings_utils.PADDING_SYMBOL
Expand Down Expand Up @@ -231,13 +231,13 @@ def test_create_vocabulary_idf(compute_idf: bool):

# "sentence" and "and" should be next, as they appear in two docs each
assert idf_sorted[1][0] > idf_sorted[0][0]
assert idf_sorted[1][1] == {"sentence", "and"}
assert idf_sorted[1][1] == {"sentence", "And"}

# finally, every token that only appears once
assert idf_sorted[2][0] > idf_sorted[1][0]
assert idf_sorted[2][1] == {
",",
"i",
"I",
"'",
"one",
"very",
Expand All @@ -246,7 +246,7 @@ def test_create_vocabulary_idf(compute_idf: bool):
"m",
"!",
"last",
"hello",
"Hello",
"a",
"another",
}

0 comments on commit 3b78b5e

Please sign in to comment.