diff --git a/Kasa/Preprocessing.py b/Kasa/Preprocessing.py index 0ecf1d7..317d505 100644 --- a/Kasa/Preprocessing.py +++ b/Kasa/Preprocessing.py @@ -15,24 +15,14 @@ def read_parallel_dataset(self,filepath_twi='../data/jw300.en-tw.tw', # read english data english_data = [] with open(filepath_english, encoding='utf-8') as file: - line = file.readline() - cnt = 1 - while line: + for line in file: english_data.append(line.strip()) - line = file.readline() - cnt += 1 # read twi data twi_data = [] with open(filepath_twi, encoding='utf-8') as file: - - # twi=file.read() - line = file.readline() - cnt = 1 - while line: + for line in file: twi_data.append(line.strip()) - line = file.readline() - cnt += 1 return twi_data,english_data