Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infer #167

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft

Infer #167

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions classification/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from typing import Dict, Any, Tuple
import os
import datetime
from torchmetrics.classification import MultilabelAveragePrecision

import torch
import torch.nn.functional as F
from torch.optim import Adam
from torch.amp import autocast
import numpy as np
from dataset import PyhaDF_Dataset, get_datasets
from model import TimmModel
from utils import set_seed, print_verbose
from config import get_config
from tqdm import tqdm
from train import load_datasets, valid

tqdm.pandas()
time_now = datetime.datetime.now().strftime('%Y%m%d-%H%M')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Device is: ",device)
CONFIG = get_config()

train_dataset, val_dataset, train_dataloader, val_dataloader = load_datasets(CONFIG)

print("Loading Model...")
model_for_run = TimmModel(num_classes=130,
model_name="convnextv2_nano",
checkpoint="/share/acoustic_species_id/models/convnextv2_nano-20230710-1731-0.pt",
CONFIG=CONFIG).to(device)

model_for_run.load_state_dict(torch.load("/share/acoustic_species_id/models/convnextv2_nano-20230710-1731-0.pt"))

valid(model_for_run, val_dataloader, 0, 1, CONFIG)
28 changes: 28 additions & 0 deletions make_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# %%
import pandas as pd

df = pd.read_csv("/mnt/passive-acoustic-biodiversity/Peru_2019_Audiomoth_Sound_Recordings/2019_Peru_MDD_AudioMoth_Recordings_Metadata_Firmware_Timing_Error_Corrected_Faulty_Clips_Removed.csv")

# %%
df

# %%
df["CLIP LENGTH"] = df["Duration"]

# %%
import math
def create_raw_chunks(row):
row = row.iloc[0]
rows = []
for i in range(0, math.floor(row["CLIP LENGTH"]), 5):
row_temp = row.copy(deep=True)
row_temp["OFFSET"] = i
row_temp["DURATION"] = 5
rows.append(row_temp.to_frame().T)
return pd.concat(rows)


chunked_df = df.groupby("SourceFile", as_index=False).apply(create_raw_chunks).reset_index()
chunked_df.to_csv("peru-2019-pyha-anaylzer-inferance.csv")


128 changes: 128 additions & 0 deletions old-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# Acoustic Multiclass Training config file

# Required path related config
dataframe_csv: "/share/acoustic_species_id/132PeruXC_TweetyNetLabels_baseline.csv"
data_path: "/share/acoustic_species_id/132_peru_xc_BC_2020/"

# Dataframe column names
offset_col: "OFFSET"
duration_col: "DURATION"
file_name_col: "IN FILE"
manual_id_col: "SPECIES"

# Device Settings
device: auto # Options: cuda, cpu, auto
prepros_device: cpu # Options: cuda, CPU

#chunking settings
is_unchunked: True
does_center_chunking: False
chunk_length_s: 5
min_length_s: 0.4
include_last: false
overlap: 0.5

# Training params
train_batch_size: ls

validation_batch_size: 32
jobs: 4
valid_freq: 1000
mixed_precision: true
valid_dataset_ratio: 0.3
does_weighted_sampling: False

# Logging
logging: true
logging_freq: 20 # Batches per log
wandb_entity: "acoustic-species-identification"
wandb_project: "acoustic-species-reu2023-sweeps"
wandb_run_name: "auto"
debug: false

# Functional settings
seed: 0
sample_rate: 32_000
map_debug: false
train_test_split: 0.8
num_fold: 5

# Model hyperparameters
# Specifies default model architecture from timm library
# Options:
# eca_nfnet_l0 (90 MB)
# tf_efficientnet_b4 (70 MB)
# convnext_nano (60 MB)
# convnext_tiny (110 MB)
# resnetv2_50 (100 MB)
# resnetv2_101 (170 MB)
# seresnext50_32x4d (100 MB)
# seresnext101_32x4d (200 MB)
# rexnet_200 (70 MB)
# mobilenetv3_large_100_miil_in21k (70 MB)
model: "eca_nfnet_l0"
epochs: 10
learning_rate: 0.001
# Loss function options:
# CE: Cross entropy
# BCE: Binary cross entropy
# BCEWL: Binary cross entropy with logits
loss_fnc: "CE"
imb: false

# Hyperparameter sweep settings
sweep_id:

# Specify path to load a checkpoint
model_checkpoint: ""

# Patience
early_stopping: false
patience: 3
min_valid_map_delta: 0.01

# Data augmentation probabilities
mixup_p: 0
time_shift_p: 0
noise_p: 0.23
freq_mask_p: 0.5
time_mask_p: 0.11
rand_eq_p: 0
noise_p: 23
lowpass_p: 0.05
highpass_p: 0.02
bg_noise_p: 0

# Data augmentation parameters
noise_type: "violet"
noise_alpha: 0.06
freq_mask_param: 10 # Number of frequencies masked
time_mask_param: 38 # Number of samples masked
mixup_alpha_range: [0.0, 0.6]
rand_eq_f_range: [20, 8000]
rand_eq_q_range: [0.3, 3]
rand_eq_g_range: [-2, 8]
rand_eq_iters: 2
lowpass_cutoff: 7000
lowpass_q_val: 0.44
highpass_cutoff: 1000
highpass_q_val: 0.44
bg_noise_alpha_range: [0.0, 0.4]

# Background noise params
# Expects background_path to lead to a directory with only audio files
# Example dataset: BirdVOX at https://dcase.community/challenge2018/task-bird-audio-detection
# Make sure to delete all bird clips so it is only background noise
# If path is blank and p=0, background noise will not be used
bg_noise_path: ""

# Transforms settings
imbalance_sampler: false
pos_weight: 1
smoothing: 0.05

# FFT Settings
hop_length: 512
n_mels: 194
n_fft: 1400
max_time: 5
2 changes: 1 addition & 1 deletion pyha_analyzer/augmentations.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def forward(
chosen clip, Tensor of target mixed with the
target of the randomly chosen file
"""
if utils.rand(0,1) <= self.prob:
if utils.rand(0,1) >= self.prob:
return clip, target

num_other_clips = sample(self.num_clips_distribution)
Expand Down
84 changes: 0 additions & 84 deletions pyha_analyzer/chunking_methods/audio_splitter.py
Original file line number Diff line number Diff line change
@@ -1,84 +0,0 @@
""" Splits longer audio files into smaller ones """

import os
import pandas as pd
import torch
import torchaudio
from tqdm import tqdm

CONFIG = {
"metadata_csv": "annotations_chunked.csv",
"metadata_output": "annotations_split.csv",

"audio_dir": "input",
"sample_rate": "error", # Only use if input format is pt
"output_dir": "output",
"output_format": "flac", # Supports torch audio formats

"chunk_length_s": 60 * 5, # Length of each clip in seconds
"overlap_s": 10, # Overlap to add to each file in seconds

"file_name_col": "FILE NAME",
"offset_col": "OFFSET",

}

def output_file_name(path: str, index: int, file_format: str) -> str:
""" Returns the output file name for a given input file name and index """
return os.path.basename(path).split('.')[0] + "_" + str(index) + "." + file_format

def split_audio_file(path: str):
""" Splits audio file into smaller chunks """
split_len = CONFIG["chunk_length_s"]

# Load audio file
if path.endswith(".pt"):
audio = torch.load(path)
sample_rate = CONFIG["sample_rate"]
else:
audio, sample_rate = torchaudio.load(path) # type: ignore
audio = audio[0]

file_len = len(audio)/float(sample_rate)
num_splits = int(file_len / split_len)

for i in range(num_splits):
# Create slice
aud_slice = audio[i*split_len*sample_rate:((i+1)*split_len+CONFIG["overlap_s"])*sample_rate]
torchaudio.save(os.path.join(CONFIG["output_dir"], # type: ignore
output_file_name(path,i,CONFIG["output_format"])),
torch.unsqueeze(aud_slice,0), sample_rate)

def edit_row(row: pd.Series) -> pd.Series:
""" Edits a row of the metadata csv to reflect the new audio files
Changes file name and offset
"""
offset = row[CONFIG["offset_col"]]
file_index = int(offset/CONFIG["chunk_length_s"])
# Update file name
row[CONFIG["file_name_col"]] = \
output_file_name(str(row[CONFIG["file_name_col"]]), file_index, CONFIG["output_format"])
# Shift offset
row[CONFIG["offset_col"]] -= file_index * CONFIG["chunk_length_s"]
return row

def edit_metadata(df: pd.DataFrame):
""" Edits metadata to reflect the new audio files """
return df.apply(edit_row, axis=1)

def split_all(input_dir: str):
""" Splits all audio files in the input directory """
input_dir = os.path.abspath(input_dir)
for path in tqdm(os.listdir(input_dir)):
audio_path = os.path.join(input_dir, path)
split_audio_file(audio_path)

def main():
""" Main function """
df = pd.read_csv(CONFIG["metadata_csv"], index_col=0)
split_all(CONFIG["audio_dir"])
df = edit_metadata(df)
df.to_csv(CONFIG["metadata_output"])

if __name__ == "__main__":
main()
23 changes: 0 additions & 23 deletions pyha_analyzer/chunking_methods/chunks_config.py
Original file line number Diff line number Diff line change
@@ -1,23 +0,0 @@

""" Stores default argument information for the argparser
Methods:
get_config: returns an ArgumentParser with the default arguments
"""
import argparse


def get_config():
""" Returns a config variable with the command line arguments or defaults
"""
parser = argparse.ArgumentParser()

parser.add_argument('-l', '--chunk_length_s', default=5, type=int, help='duration')
parser.add_argument('-f', '--filetype', default='.wav', type=str)
parser.add_argument('-w', '--sliding_window', action='store_true')

parser.add_argument('-a', '--audio_path', default='~/path/to/data/', type=str)
parser.add_argument('-m', '--metadata', default='~/metadata.csv', type=str)
parser.add_argument('-s', '--strong_labels', default='~/strong_labels.csv', type=str)
parser.add_argument('-c', '--chunk_labels', default='~/chunks.csv', type=str)

return parser.parse_args()
46 changes: 0 additions & 46 deletions pyha_analyzer/chunking_methods/combine_annotations.py
Original file line number Diff line number Diff line change
@@ -1,46 +0,0 @@
""" Combines short frequent annotations into a longer call annotation """

import argparse

import pandas as pd
from tqdm import tqdm

def combine_annotations(df: pd.DataFrame, max_gap_s: float = 0.5) -> pd.DataFrame:
""" Combine any annotations that have a gap length less than max_gap seconds"""
groups = df.groupby("FILE NAME")
out_groups = []
for _, group in tqdm(groups):
group.reset_index(drop=True, inplace=True)
df = group.sort_values(by=["OFFSET"])
i = 0
off_col = df.columns.get_loc("OFFSET")
dur_col = df.columns.get_loc("DURATION")
# Can't use a for loop because combining annotations changes the length of the dataframe :(
while i < len(df.index) - 1:
gap_length = df.iloc[i+1,off_col] - df.iloc[i,off_col] - df.iloc[i,dur_col]
if gap_length < max_gap_s:
# Combine the two annotations by increasing the firsts duration
# and deleting the second
df.iloc[i, dur_col] += gap_length + df.iloc[i+1,dur_col]
df.drop(df.iloc[i+1].name, inplace=True)
else:
i += 1

df.reset_index(drop=True, inplace=True)
out_groups.append(df)
return pd.concat(out_groups, ignore_index=True, sort=False)

if __name__=="__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument('-i', '--input', type=str)
argparser.add_argument('-o', '--output', type=str)
argparser.add_argument('-g', '--max_gap', type=float, default=0.3)
args = argparser.parse_args()
assert args.input is not None, "Input file not specified"
assert args.output is not None, "Output file not specified"

dataframe = pd.read_csv(args.input, index_col=0)
combined = combine_annotations(dataframe, args.max_gap)
combined.to_csv(args.output)
print("Old annotation count:",len(dataframe.index))
print("New annotation count:",len(combined.index))
Loading
Loading