-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path2_RRPCA_HMDwithLFAS_allData.R
106 lines (85 loc) · 3.56 KB
/
2_RRPCA_HMDwithLFAS_allData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# De-noising methods HMD files
# reads combined 1-min HMD data with AS labels and runs for each site
# output from 1A
rm(list=ls())
library(data.table)
library(ggplot2)
library(lubridate)
library(dplyr)
library(viridis)
library(tidyverse)
library(rsvd)
# PARAMS
fqr = "LF"
DC = Sys.Date()
#GET DATA ####
gdrive = "G:\\.shortcut-targets-by-id\\1QAlmQwj6IS-J6Gw2PRNQR6jz_4qA5CYZ\\"
dirIn = paste0( gdrive, "SoundCoop_AcousticScene\\CombineData\\A_outputHMDDETS\\" )
inFiles = list.files( dirIn, pattern = "_HmdDetsAS", recursive = T, full.names = T )
inFiles = inFiles[!grepl("csv", inFiles)]
dirOut = inDir
RRPCAsumOUT = NULL # summary of percentiles for each site
# LOOP through sites ####
for (f in 1: length(inFiles)) { # f = 1
load( inFiles[f])
AS = read.csv(paste0(dirIn,"\\", siteN, "_HmdDetsAS.csv"))
AS = mutate(AS, season = ifelse(season == "notFilled", "no label", season))
AS$season <- factor(AS$season, ordered = TRUE, levels = c("form", "ice", "break", "open","no label"))
load( inFiles[f])
st = sapply(strsplit(basename( inFiles[f]), "_"), "[[", 2) #site name
HMDdet$Site = st
idNA = ( which(is.na(HMDdet))) # check for NAs, as.data.frame( colnames( Ambient )[1:10] )
idx = grep("^X", colnames(HMDdet))
hix = as.numeric( gsub("X","", names(HMDdet)[idx]) )
Nv = HMDdet[, idx] #dB values
NvP = 10^(Nv/20) #pressure values
nvDate = HMDdet$dateTime
## truncate to 100-1kHz ####
fe = which(hix == 1001.2)
NvPt = NvP[,1:fe]
Nv = Nv[ ,1:fe]
hix = hix[1:fe]
## RRPCA ####
# Robust principal components analysis separates a matrix into a low-rank plus sparse component
#a method for the robust separation of a rectangular (m, n) matrix A into a low-rank component L and a sparse component S
# input = ( NvP )
lamd = max(NvPt)^-0.5 #default settings
nvpcaTOL = rrpca(NvPt)
sampleHours = nrow(NvP)
save(nvpcaTOL, file = paste0(dirOut, "\\RRPCA_HMD_allSites_", st, "_", DC, ".Rda") )
## (option to load rrpca results here) ####
## RRPCA results ####
#low rank
Lr = as.data.frame(nvpcaTOL$L)
colnames(Lr) = hix
LrDB = 10*log10( Lr^2 ) #CHECK: min(LrDB$`63`), no negative values, just values without transients
colnames(LrDB) = hix
#sparse matrix
Sp = as.data.frame(nvpcaTOL$S)
colnames(Sp) = hix
SpDB = 10*log10( (Sp)^2 ) # negative and zero values-- does not make sense to convert back to dB
colnames(SpDB) = hix
## RRPCA thresholds ####
# sum of difference across frequencies for each minute
LRdiff = as.data.frame ( rowSums( abs ( (LrDB - Nv) ) ) )
colnames(LRdiff) = 'LRdiff'
LRfq = as.data.frame ( as.numeric ( colnames(LrDB) [apply(LrDB, 1, (which.max) )] ) )
colnames(LRfq) = 'LRfq'
# sum of sparce across frequencies for each minute
SPsum = as.data.frame ( rowSums( abs ( Sp ) ) )
colnames(SPsum) = 'SPsum'
## label files ####
HMDdet$LowRanK = as.numeric( as.character(LRdiff$LRdiff ) )
HMDdet$Sparce = as.numeric( as.character(SPsum$SPsum ) )
HMDdet$LRfq = LRfq$LRfq
## percentile for thresholds ####
RRPCAsum = as.data.frame ( rbind ( quantile(HMDdet$LowRanK, na.rm = T),
quantile(HMDdet$LRfq, na.rm = T),
quantile(HMDdet$Sparce, na.rm = T) ) )
RRPCAsum$Site = st
RRPCAsum$RRPCAmetric = c("LR-sum","LR-freq","SP-sum")
RRPCAsumOUT = rbind(RRPCAsumOUT, RRPCAsum )
## write out new HMD+ ####
save(HMDdet, file = paste0(dirOut, "\\HMDdets_RpcaSite_", st, "_", DC, ".Rda") )
} # end site loop
save(RRPCAsum, file = paste0(dirOut, "\\RRPCAsum_bySite", "_", DC, ".Rda") )