Skip to content

Commit

Permalink
fix: 외부데이터 이름 형식 변경
Browse files Browse the repository at this point in the history
  • Loading branch information
yeseoLee committed Nov 23, 2024
1 parent 5d606e2 commit f60a955
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions data_process/process_formatting.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import pandas as pd


def formatting(input_filename, output_filename):
def formatting(suffix, input_filename, output_filename):
# CSV 파일 읽기
df = pd.read_csv(input_filename, encoding="utf-8")

# 새로운 형식으로 변환
new_records = []
for idx, row in df.iterrows():
new_record = {
"id": f"external-data-{idx + 1}",
"id": f"external-data-{suffix}{idx + 1}",
"paragraph": row["paragraph"],
"problems": {"question": row["question"].strip(), "choices": eval(row["choices"]), "answer": row["answer"]},
}
Expand All @@ -22,5 +22,5 @@ def formatting(input_filename, output_filename):

if __name__ == "__main__":
# formatting("external_raw.csv", "external.csv")
formatting("sat_gaokao_ko_raw.csv", "sat_gaokao_ko.csv")
formatting("MuSR_ko_raw.csv", "MuSR_ko.csv")
formatting("SAT", "sat_gaokao_ko_raw.csv", "sat_gaokao_ko.csv")
formatting("MuSR", "MuSR_ko_raw.csv", "MuSR_ko.csv")

0 comments on commit f60a955

Please sign in to comment.