From f60a955b6ad2698d75f550790ccf131eea41e502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EC=98=88=EC=84=9C?= <49704047+yeseoLee@users.noreply.github.com> Date: Sun, 24 Nov 2024 00:39:05 +0900 Subject: [PATCH] =?UTF-8?q?fix:=20=EC=99=B8=EB=B6=80=EB=8D=B0=EC=9D=B4?= =?UTF-8?q?=ED=84=B0=20=EC=9D=B4=EB=A6=84=20=ED=98=95=EC=8B=9D=20=EB=B3=80?= =?UTF-8?q?=EA=B2=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- data_process/process_formatting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data_process/process_formatting.py b/data_process/process_formatting.py index 0d7e521..85680a6 100644 --- a/data_process/process_formatting.py +++ b/data_process/process_formatting.py @@ -1,7 +1,7 @@ import pandas as pd -def formatting(input_filename, output_filename): +def formatting(suffix, input_filename, output_filename): # CSV 파일 읽기 df = pd.read_csv(input_filename, encoding="utf-8") @@ -9,7 +9,7 @@ def formatting(input_filename, output_filename): new_records = [] for idx, row in df.iterrows(): new_record = { - "id": f"external-data-{idx + 1}", + "id": f"external-data-{suffix}{idx + 1}", "paragraph": row["paragraph"], "problems": {"question": row["question"].strip(), "choices": eval(row["choices"]), "answer": row["answer"]}, } @@ -22,5 +22,5 @@ def formatting(input_filename, output_filename): if __name__ == "__main__": # formatting("external_raw.csv", "external.csv") - formatting("sat_gaokao_ko_raw.csv", "sat_gaokao_ko.csv") - formatting("MuSR_ko_raw.csv", "MuSR_ko.csv") + formatting("SAT", "sat_gaokao_ko_raw.csv", "sat_gaokao_ko.csv") + formatting("MuSR", "MuSR_ko_raw.csv", "MuSR_ko.csv")