Skip to content

Commit

Permalink
fix: remove empty files.
Browse files Browse the repository at this point in the history
  • Loading branch information
AJDERS committed Nov 29, 2023
1 parent 8940991 commit 552c601
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions src/coral_models/prepare_raw_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,9 +344,16 @@ def prepare_raw_data(
# audio.
read_aloud_duration = 0.0
conversation_duration = 0.0
rows_to_remove = []
for row_i, row in tqdm(recordings.iterrows()):
filename = input_path / row["filename"]

# Check if the file is empty, and if it is, remove it from the dataframe
# and continue to the next file
if filename.stat().st_size < 200000: # Any file smaller than this is empty
rows_to_remove.append(row_i)
continue

# Get the new filename
# New filename is in the format is for conversations:
# "recording_id_speaker_id1_speaker_id2_recorder_speaker_id_conversation.wav"
Expand Down Expand Up @@ -400,6 +407,9 @@ def prepare_raw_data(
except FileNotFoundError:
pass

# Remove rows with empty files
recordings = recordings.drop(rows_to_remove).reset_index(drop=True)

# Write a README file
readme = make_readme()
with open(output_path / "README.md", "w") as f:
Expand Down

0 comments on commit 552c601

Please sign in to comment.