Skip to content

Commit

Permalink
fix: crash if requested output field for inference doesn't exist in d…
Browse files Browse the repository at this point in the history
…ataset (#624)

* fix: crash if requested output field for inference doesn't exist in dataset
* refactor: overwrite column is a warning, update output with dataset content
* test: both empty and entirely missing ground-truth datasets
  • Loading branch information
ividal authored Jan 17, 2025
1 parent 80c51ff commit b894b29
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 6 deletions.
9 changes: 8 additions & 1 deletion lumigator/python/mzai/backend/backend/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,19 @@ def dialog_dataset():


@pytest.fixture(scope="function")
def dialog_no_gt_dataset():
def dialog_empty_gt_dataset():
filename = common_resources_dir() / "sample_data" / "dialogsum_mini_empty_gt.csv"
with Path(filename).open("rb") as f:
yield f


@pytest.fixture(scope="function")
def dialog_no_gt_dataset():
filename = common_resources_dir() / "sample_data" / "dialogsum_mini_no_gt.csv"
with Path(filename).open("rb") as f:
yield f


@pytest.fixture(scope="session", autouse=True)
def db_engine():
"""Initialize a DB engine and create tables."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,17 +135,20 @@ def test_upload_data_launch_job(
logger.info(f"#{logs_infer_job_response_model.logs}#")


@pytest.mark.parametrize("unnanotated_dataset", ["dialog_empty_gt_dataset", "dialog_no_gt_dataset"])
def test_upload_data_no_gt_launch_annotation(
request: pytest.FixtureRequest,
local_client: TestClient,
dialog_no_gt_dataset,
unnanotated_dataset,
simple_eval_template,
simple_infer_template,
dependency_overrides_services,
):
dataset = request.getfixturevalue(unnanotated_dataset)
create_response = local_client.post(
"/datasets/",
data={},
files={"dataset": dialog_no_gt_dataset, "format": (None, DatasetFormat.JOB.value)},
files={"dataset": dataset, "format": (None, DatasetFormat.JOB.value)},
)

assert create_response.status_code == 201
Expand Down
13 changes: 10 additions & 3 deletions lumigator/python/mzai/jobs/inference/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,17 @@ def run_inference(config: InferenceJobConfig) -> Path:
else:
raise NotImplementedError("Inference pipeline not supported.")

# run inference
# We keep any columns that were already there (not just the original input
# samples, but also past predictions under another column name)
output.update(dataset.to_dict())

# We are trusting the user: if the dataset already had a column with the output_field
# they selected, we overwrite it with the values from our inference.

if config.job.output_field in dataset.column_names:
logger.warning(f"Overwriting {config.job.output_field}")

output[config.job.output_field] = predict(dataset_iterable, model_client)
output["examples"] = dataset["examples"]
output["ground_truth"] = dataset["ground_truth"]
output["model"] = output_model_name

output_path = save_outputs(config, output)
Expand Down
55 changes: 55 additions & 0 deletions lumigator/python/mzai/sample_data/dialogsum_mini_no_gt.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
examples
"#Person1#: Hello, how are you doing today?
#Person2#: I ' Ve been having trouble breathing lately.
#Person1#: Have you had any type of cold lately?
#Person2#: No, I haven ' t had a cold. I just have a heavy feeling in my chest when I try to breathe.
#Person1#: Do you have any allergies that you know of?
#Person2#: No, I don ' t have any allergies that I know of.
#Person1#: Does this happen all the time or mostly when you are active?
#Person2#: It happens a lot when I work out.
#Person1#: I am going to send you to a pulmonary specialist who can run tests on you for asthma.
#Person2#: Thank you for your help, doctor."
"#Person1#: Hey Jimmy. Let's go workout later today.
#Person2#: Sure. What time do you want to go?
#Person1#: How about at 3:30?
#Person2#: That sounds good. Today we work on Legs and forearm.
#Person1#: Hey. I just played basketball earlier, so my legs are a little sore. Let's work out on arms and stomach today.
#Person2#: I'm on a weekly schedule. You're messing everything up.
#Person1#: C'mon. We're only switching two days. You can do legs on Friday.
#Person2#: Aright. I'll meet you at the gym at 3:30 then."
"#Person1#: I need to stop eating such unhealthy foods.
#Person2#: I know what you mean. I've started eating better myself.
#Person1#: What foods do you eat now?
#Person2#: I tend to stick to fruits, vegetables, and chicken.
#Person1#: Those are the only things you eat?
#Person2#: That's basically what I eat.
#Person1#: Why aren't you eating anything else?
#Person2#: Well, fruits and vegetables are very healthy.
#Person1#: And the chicken?
#Person2#: It's really healthy to eat when you bake it.
#Person1#: I guess that does sound a lot healthier."
"#Person1#: Do you believe in UFOs?
#Person2#: Of course, they are out there.
#Person1#: But I never saw them.
#Person2#: Are you stupid? They are called UFOs, so not everybody can see them.
#Person1#: You mean that you can them.
#Person2#: That's right. I can see them in my dreams.
#Person1#: They come to the earth?
#Person2#: No. Their task is to send the aliens here from the outer space.
#Person1#: Aliens from the outer space? Do you talk to them? What do they look like?
#Person2#: OK, OK, one by one, please! They look like robots, but they can speak. Their mission is to make friends with human beings.
#Person1#: That means that you talk to them? In which language?
#Person2#: Of course in English, they learn English on Mars too.
#Person1#: Wow. Sounds fantastic!"
"#Person1#: Did you go to school today?
#Person2#: Of course. Did you?
#Person1#: I didn't want to, so I didn't.
#Person2#: That's sad, but have you gone to the movies recently?
#Person1#: That's a switch.
#Person2#: I'm serious, have you?
#Person1#: No, I haven't. Why?
#Person2#: I really want to go to the movies this weekend.
#Person1#: So go then.
#Person2#: I really don't want to go by myself.
#Person1#: Well anyway, do you plan on going to school tomorrow?
#Person2#: No, I think I'm going to go to the movies."

0 comments on commit b894b29

Please sign in to comment.