UCSD-E4E · Sean1572 · Jun 28, 2024 · Jun 28, 2024 · Jul 12, 2024 · Jul 12, 2024
diff --git a/classification/test_model.py b/classification/test_model.py
@@ -0,0 +1,34 @@
+from typing import Dict, Any, Tuple
+import os
+import datetime
+from torchmetrics.classification import MultilabelAveragePrecision
+
+import torch
+import torch.nn.functional as F
+from torch.optim import Adam
+from torch.amp import autocast
+import numpy as np
+from dataset import PyhaDF_Dataset, get_datasets
+from model import TimmModel
+from utils import set_seed, print_verbose
+from config import get_config
+from tqdm import tqdm
+from train import load_datasets, valid
+
+tqdm.pandas()
+time_now  = datetime.datetime.now().strftime('%Y%m%d-%H%M') 
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+print("Device is: ",device)
+CONFIG = get_config()
+
+train_dataset, val_dataset, train_dataloader, val_dataloader = load_datasets(CONFIG)
+
+print("Loading Model...")
+model_for_run = TimmModel(num_classes=130, 
+                            model_name="convnextv2_nano", 
+                            checkpoint="/share/acoustic_species_id/models/convnextv2_nano-20230710-1731-0.pt",
+                            CONFIG=CONFIG).to(device)
+
+model_for_run.load_state_dict(torch.load("/share/acoustic_species_id/models/convnextv2_nano-20230710-1731-0.pt"))
+
+valid(model_for_run, val_dataloader, 0, 1, CONFIG)
diff --git a/make_csv.py b/make_csv.py
@@ -0,0 +1,28 @@
+# %%
+import pandas as pd
+
+df = pd.read_csv("/mnt/passive-acoustic-biodiversity/Peru_2019_Audiomoth_Sound_Recordings/2019_Peru_MDD_AudioMoth_Recordings_Metadata_Firmware_Timing_Error_Corrected_Faulty_Clips_Removed.csv")
+
+# %%
+df
+
+# %%
+df["CLIP LENGTH"] = df["Duration"]
+
+# %%
+import math 
+def create_raw_chunks(row):
+    row = row.iloc[0]
+    rows = []
+    for i in range(0, math.floor(row["CLIP LENGTH"]), 5):
+        row_temp = row.copy(deep=True)
+        row_temp["OFFSET"] = i
+        row_temp["DURATION"] = 5
+        rows.append(row_temp.to_frame().T)
+    return pd.concat(rows)
+
+
+chunked_df = df.groupby("SourceFile", as_index=False).apply(create_raw_chunks).reset_index()
+chunked_df.to_csv("peru-2019-pyha-anaylzer-inferance.csv")
+
+
diff --git a/old-config.yml b/old-config.yml
@@ -0,0 +1,128 @@
+# Acoustic Multiclass Training config file
+
+# Required path related config
+dataframe_csv: "/share/acoustic_species_id/132PeruXC_TweetyNetLabels_baseline.csv"
+data_path: "/share/acoustic_species_id/132_peru_xc_BC_2020/"
+
+# Dataframe column names
+offset_col: "OFFSET"
+duration_col: "DURATION"
+file_name_col: "IN FILE"
+manual_id_col: "SPECIES"
+
+# Device Settings
+device: auto # Options: cuda, cpu, auto 
+prepros_device: cpu # Options: cuda, CPU
+
+#chunking settings
+is_unchunked: True
+does_center_chunking: False
+chunk_length_s: 5
+min_length_s: 0.4
+include_last: false
+overlap: 0.5
+
+# Training params
+train_batch_size: ls
+
+validation_batch_size: 32
+jobs: 4
+valid_freq: 1000
+mixed_precision: true
+valid_dataset_ratio: 0.3
+does_weighted_sampling: False
+
+# Logging
+logging: true
+logging_freq: 20 # Batches per log
+wandb_entity: "acoustic-species-identification"
+wandb_project: "acoustic-species-reu2023-sweeps"
+wandb_run_name: "auto"
+debug: false
+
+# Functional settings
+seed: 0
+sample_rate: 32_000
+map_debug: false
+train_test_split: 0.8
+num_fold: 5
+
+# Model hyperparameters
+# Specifies default model architecture from timm library
+# Options:
+#     eca_nfnet_l0 (90 MB)
+#     tf_efficientnet_b4 (70 MB)
+#     convnext_nano (60 MB)
+#     convnext_tiny (110 MB)
+#     resnetv2_50 (100 MB)
+#     resnetv2_101 (170 MB)
+#     seresnext50_32x4d (100 MB)
+#     seresnext101_32x4d (200 MB)
+#     rexnet_200 (70 MB)
+#     mobilenetv3_large_100_miil_in21k (70 MB)
+model: "eca_nfnet_l0"
+epochs: 10
+learning_rate: 0.001
+# Loss function options:
+#   CE: Cross entropy
+#   BCE: Binary cross entropy
+#   BCEWL: Binary cross entropy with logits
+loss_fnc: "CE"
+imb: false
+
+# Hyperparameter sweep settings
+sweep_id: 
+
+# Specify path to load a checkpoint
+model_checkpoint: ""
+
+# Patience
+early_stopping: false
+patience: 3
+min_valid_map_delta: 0.01
+
+# Data augmentation probabilities
+mixup_p: 0
+time_shift_p: 0
+noise_p: 0.23
+freq_mask_p: 0.5
+time_mask_p: 0.11
+rand_eq_p: 0
+noise_p: 23
+lowpass_p: 0.05
+highpass_p: 0.02
+bg_noise_p: 0
+
+# Data augmentation parameters
+noise_type: "violet"
+noise_alpha: 0.06
+freq_mask_param: 10 # Number of frequencies masked
+time_mask_param: 38 # Number of samples masked
+mixup_alpha_range: [0.0, 0.6]
+rand_eq_f_range: [20, 8000]
+rand_eq_q_range: [0.3, 3]
+rand_eq_g_range: [-2, 8]
+rand_eq_iters: 2
+lowpass_cutoff: 7000
+lowpass_q_val: 0.44
+highpass_cutoff: 1000
+highpass_q_val: 0.44
+bg_noise_alpha_range: [0.0, 0.4]
+
+# Background noise params
+# Expects background_path to lead to a directory with only audio files
+# Example dataset: BirdVOX at https://dcase.community/challenge2018/task-bird-audio-detection
+# Make sure to delete all bird clips so it is only background noise
+# If path is blank and p=0, background noise will not be used
+bg_noise_path: ""
+
+# Transforms settings
+imbalance_sampler: false
+pos_weight: 1
+smoothing: 0.05
+
+# FFT Settings
+hop_length: 512
+n_mels: 194
+n_fft: 1400
+max_time: 5
diff --git a/pyha_analyzer/augmentations.py b/pyha_analyzer/augmentations.py
@@ -139,7 +139,7 @@ def forward(
         chosen clip, Tensor of target mixed with the
         target of the randomly chosen file
         """
-        if utils.rand(0,1) <= self.prob:
+        if utils.rand(0,1) >= self.prob:
             return clip, target
 
         num_other_clips = sample(self.num_clips_distribution)

diff --git a/pyha_analyzer/chunking_methods/audio_splitter.py b/pyha_analyzer/chunking_methods/audio_splitter.py
@@ -1,84 +0,0 @@
-""" Splits longer audio files into smaller ones """
-
-import os
-import pandas as pd
-import torch
-import torchaudio
-from tqdm import tqdm
-
-CONFIG = {
-    "metadata_csv": "annotations_chunked.csv",
-    "metadata_output": "annotations_split.csv",
-
-    "audio_dir": "input",
-    "sample_rate": "error", # Only use if input format is pt
-    "output_dir": "output",
-    "output_format": "flac", # Supports torch audio formats
-
-    "chunk_length_s": 60 * 5, # Length of each clip in seconds
-    "overlap_s": 10, # Overlap to add to each file in seconds
-
-    "file_name_col": "FILE NAME",
-    "offset_col": "OFFSET",
-
-}
-
-def output_file_name(path: str, index: int, file_format: str) -> str:
-    """ Returns the output file name for a given input file name and index """
-    return os.path.basename(path).split('.')[0] + "_" + str(index) + "." + file_format
-
-def split_audio_file(path: str):
-    """ Splits audio file into smaller chunks """
-    split_len = CONFIG["chunk_length_s"]
-
-    # Load audio file
-    if path.endswith(".pt"):
-        audio = torch.load(path)
-        sample_rate = CONFIG["sample_rate"]
-    else:
-        audio, sample_rate = torchaudio.load(path) # type: ignore
-        audio = audio[0]
-
-    file_len = len(audio)/float(sample_rate)
-    num_splits = int(file_len / split_len)
-
-    for i in range(num_splits):
-        # Create slice
-        aud_slice = audio[i*split_len*sample_rate:((i+1)*split_len+CONFIG["overlap_s"])*sample_rate]
-        torchaudio.save(os.path.join(CONFIG["output_dir"], # type: ignore
-                                     output_file_name(path,i,CONFIG["output_format"])),
-                        torch.unsqueeze(aud_slice,0), sample_rate)
-
-def edit_row(row: pd.Series) -> pd.Series:
-    """ Edits a row of the metadata csv to reflect the new audio files
-    Changes file name and offset
-    """
-    offset = row[CONFIG["offset_col"]]
-    file_index = int(offset/CONFIG["chunk_length_s"])
-    # Update file name
-    row[CONFIG["file_name_col"]] = \
-        output_file_name(str(row[CONFIG["file_name_col"]]), file_index, CONFIG["output_format"])
-    # Shift offset
-    row[CONFIG["offset_col"]] -= file_index * CONFIG["chunk_length_s"]
-    return row
-
-def edit_metadata(df: pd.DataFrame):
-    """ Edits metadata to reflect the new audio files """
-    return df.apply(edit_row, axis=1)
-
-def split_all(input_dir: str):
-    """ Splits all audio files in the input directory """
-    input_dir = os.path.abspath(input_dir)
-    for path in tqdm(os.listdir(input_dir)):
-        audio_path = os.path.join(input_dir, path)
-        split_audio_file(audio_path)
-
-def main():
-    """ Main function """
-    df = pd.read_csv(CONFIG["metadata_csv"], index_col=0)
-    split_all(CONFIG["audio_dir"])
-    df = edit_metadata(df)
-    df.to_csv(CONFIG["metadata_output"])
-
-if __name__ == "__main__":
-    main()

diff --git a/pyha_analyzer/chunking_methods/chunks_config.py b/pyha_analyzer/chunking_methods/chunks_config.py
@@ -1,23 +0,0 @@
-
-""" Stores default argument information for the argparser
-    Methods:
-        get_config: returns an ArgumentParser with the default arguments
-"""
-import argparse
-
-
-def get_config():
-    """ Returns a config variable with the command line arguments or defaults
-    """
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('-l', '--chunk_length_s', default=5, type=int, help='duration')
-    parser.add_argument('-f', '--filetype', default='.wav', type=str)
-    parser.add_argument('-w', '--sliding_window', action='store_true')
-
-    parser.add_argument('-a', '--audio_path', default='~/path/to/data/', type=str)
-    parser.add_argument('-m', '--metadata', default='~/metadata.csv', type=str)
-    parser.add_argument('-s', '--strong_labels', default='~/strong_labels.csv', type=str)
-    parser.add_argument('-c', '--chunk_labels', default='~/chunks.csv', type=str)
-
-    return parser.parse_args()

diff --git a/pyha_analyzer/chunking_methods/combine_annotations.py b/pyha_analyzer/chunking_methods/combine_annotations.py
@@ -1,46 +0,0 @@
-""" Combines short frequent annotations into a longer call annotation """
-
-import argparse
-
-import pandas as pd
-from tqdm import tqdm
-
-def combine_annotations(df: pd.DataFrame, max_gap_s: float = 0.5) -> pd.DataFrame:
-    """ Combine any annotations that have a gap length less than max_gap seconds"""
-    groups = df.groupby("FILE NAME")
-    out_groups = []
-    for _, group in tqdm(groups):
-        group.reset_index(drop=True, inplace=True)
-        df = group.sort_values(by=["OFFSET"])
-        i = 0
-        off_col = df.columns.get_loc("OFFSET")
-        dur_col = df.columns.get_loc("DURATION")
-        # Can't use a for loop because combining annotations changes the length of the dataframe :(
-        while i < len(df.index) - 1:
-            gap_length = df.iloc[i+1,off_col] - df.iloc[i,off_col] - df.iloc[i,dur_col]
-            if gap_length < max_gap_s:
-                # Combine the two annotations by increasing the firsts duration
-                # and deleting the second
-                df.iloc[i, dur_col] += gap_length + df.iloc[i+1,dur_col]
-                df.drop(df.iloc[i+1].name, inplace=True)
-            else:
-                i += 1
-
-        df.reset_index(drop=True, inplace=True)
-        out_groups.append(df)
-    return pd.concat(out_groups, ignore_index=True, sort=False)
-
-if __name__=="__main__":
-    argparser = argparse.ArgumentParser()
-    argparser.add_argument('-i', '--input', type=str)
-    argparser.add_argument('-o', '--output', type=str)
-    argparser.add_argument('-g', '--max_gap', type=float, default=0.3)
-    args = argparser.parse_args()
-    assert args.input is not None, "Input file not specified"
-    assert args.output is not None, "Output file not specified"
-
-    dataframe = pd.read_csv(args.input, index_col=0)
-    combined = combine_annotations(dataframe, args.max_gap)
-    combined.to_csv(args.output)
-    print("Old annotation count:",len(dataframe.index))
-    print("New annotation count:",len(combined.index))