Skip to content

Commit

Permalink
Add basic functions for testing
Browse files Browse the repository at this point in the history
  • Loading branch information
BrittanyTroast-NOAA committed Feb 10, 2025
1 parent 88bc555 commit 813bfb0
Show file tree
Hide file tree
Showing 2 changed files with 262 additions and 0 deletions.
193 changes: 193 additions & 0 deletions R/data_prep.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#' Title
#' @description
#' This function reformats data from the "plotIndicatorTimeSeries" format, into a data object that works with plotting functions in the IEAnalyzeR function.
#'
#'
#' @param df Dataset with top 3 rows inlcuding metadata of indicator name, unit, and subcategory.
#' @param trends T/F if you would like the function to calculate trends on this dataset.
#'
#' @return An object with 5 datasets used in "plot_fn_obj".
#' @export
#'
#' @examples
data_prep <-function (df, trends=T) {

df_list<-vector("list", 5)
names(df_list)<-c("data", "pos", "neg", "labs", "vals")

#Data used for everything
df_dat<-df[4:nrow(df),c(1:ncol(df))]

if (ncol(df_dat)<2.5) {
colnames(df_dat)<-c("year","value")
df_dat$value<- as.numeric(df_dat$value)

mean<-mean(as.numeric(df_dat$value), na.rm = T)
sd<-sd(as.numeric(df_dat$value), na.rm = T)

df_dat$valence[df_dat$value>=mean]<-"pos"
df_dat$valence[df_dat$value< mean]<-"neg"
df_dat$min <- ifelse(df_dat$value >= mean, mean, df_dat$value)
df_dat$max <- ifelse(df_dat$value >= mean, df_dat$value, mean)
df_dat$year <- as.numeric(df_dat$year)
df_dat} else {

sub_list<-list()
for (i in 2:ncol(df_dat)){
sub_df<-df_dat[,c(1,i)]
df_lab<-df[1:3,] #For example sake cutting to only col I need
ind<-df_lab[3,]
colnames(sub_df)<-c("year","value")
# sub_df$value<- as.numeric(sub_df$value)
sub_df<-as.data.frame(lapply(sub_df, as.numeric))

mean<-mean(as.numeric(sub_df$value), na.rm = T)
sd<-sd(as.numeric(sub_df$value), na.rm = T)

sub_df$valence[sub_df$value>=mean]<-"pos"
sub_df$valence[sub_df$value< mean]<-"neg"
sub_df$min <- ifelse(sub_df$value >= mean, mean, sub_df$value)
sub_df$max <- ifelse(sub_df$value >= mean, sub_df$value, mean)
sub_df$year <- as.numeric(sub_df$year)
sub_df$subnm<-ind[,i]
sub_list[[i]]<-sub_df

}
df_dat<-do.call("rbind",sub_list)
}
df_list$data<-df_dat


#Pos data set used for main plot

if(ncol(df_dat)<6){
mean<-mean(as.numeric(df_dat$value), na.rm = T)
sd<-sd(as.numeric(df_dat$value), na.rm = T)
pos<-df_dat
pos$value<-ifelse(pos$valence == "pos",pos$value, mean)
pos} else {
sub_list<-list()
subs<-unique(df_dat$subnm)
for (i in 1:length(subs)){
sub_df<-df_dat[df_dat$subnm==subs[i],]
mean<-mean(as.numeric(sub_df$value), na.rm = T)
sd<-sd(as.numeric(sub_df$value), na.rm = T)
pos<-sub_df
pos$value<-ifelse(pos$valence == "pos",pos$value, mean)
pos$subnm<-subs[i]
pos$mean<-mean
pos$sd<-sd
sub_list[[i]]<-pos
}
pos<-do.call("rbind",sub_list)
}
df_list$pos<-pos


#Neg data set used for main plot
if(ncol(df_dat)<6){
mean<-mean(as.numeric(df_dat$value), na.rm = T)
sd<-sd(as.numeric(df_dat$value), na.rm = T)
neg<-df_dat
neg$value<-ifelse(neg$valence == "neg",neg$value, mean)
neg} else {
sub_list<-list()
subs<-unique(df_dat$subnm)
for (i in 1:length(subs)){
sub_df<-df_dat[df_dat$subnm==subs[i],]
mean<-mean(as.numeric(sub_df$value), na.rm = T)
sd<-sd(as.numeric(sub_df$value), na.rm = T)
neg<-sub_df
neg$value<-ifelse(neg$valence == "neg",neg$value, mean)
neg$subnm<-subs[i]
neg$mean<-mean
neg$sd<-sd
sub_list[[i]]<-neg
}
neg<-do.call("rbind",sub_list)
}
df_list$neg<-neg

df_list$labs<-df[1:3, c(1:ncol(df))]

#Independent values used throughout
if (trends==T) {
if(ncol(df_dat)<6){
mean<-mean(as.numeric(df_dat$value), na.rm = T)
sd<-sd(as.numeric(df_dat$value), na.rm = T)

#Trend Analysis
last5<-df_dat[df_dat$year > max(df_dat$year)-5,]
#Mean Trend
last5_mean<-mean(last5$value) # mean value last 5 years
mean_tr<-if_else(last5_mean>mean+sd, "ptPlus", if_else(last5_mean<mean-sd, "ptMinus","ptSolid")) #qualify mean trend
mean_sym<-if_else(last5_mean>mean+sd, "+", if_else(last5_mean<mean-sd, "-","")) #qualify mean trend
mean_word<-if_else(last5_mean>mean+sd, "greater", if_else(last5_mean<mean-sd, "below","within")) #qualify mean trend

#Slope Trend
lmout<-summary(lm(last5$value~last5$year))
last5_slope<-coef(lmout)[2,1] * 5 #multiply by years in the trend (slope per year * number of years=rise over 5 years)
slope_tr<-if_else(last5_slope>sd, "arrowUp", if_else(last5_slope< c(-sd), "arrowDown","arrowRight"))
slope_sym<-if_else(last5_slope>sd, "", if_else(last5_slope< c(-sd), "",""))
slope_word<-if_else(last5_slope>sd, "an increasing", if_else(last5_slope< c(-sd), "a decreasing","a stable"))

#Dataframe
vals<-data.frame(mean=mean,
sd=sd,
mean_tr=mean_tr,
slope_tr=slope_tr,
mean_sym=mean_sym,
slope_sym=slope_sym,
mean_word=mean_word,
slope_word=slope_word)
vals} else {
sub_list<-list()
subs<-unique(df_dat$subnm)
for (i in 1:length(subs)){
sub_df<-df_dat[df_dat$subnm==subs[i],]
minyear<-min(na.omit(sub_df)$year)
maxyear<-max(na.omit(sub_df)$year)
allminyear<-min(df_dat$year)
allmaxyear<-max(df_dat$year)
mean<-mean(as.numeric(sub_df$value), na.rm = T)
sd<-sd(as.numeric(sub_df$value), na.rm = T)

#Trend Analysis
last5<-sub_df[sub_df$year > max(sub_df$year)-5,]
#Mean Trend
last5_mean<-mean(last5$value) # mean value last 5 years
mean_tr<-if_else(last5_mean>mean+sd, "ptPlus", if_else(last5_mean<mean-sd, "ptMinus","ptSolid")) #qualify mean trend
mean_sym<-if_else(last5_mean>mean+sd, "+", if_else(last5_mean<mean-sd, "-","")) #qualify mean trend
mean_word<-if_else(last5_mean>mean+sd, "greater", if_else(last5_mean<mean-sd, "below","within")) #qualify mean trend

#Slope Trend
lmout<-summary(lm(last5$value~last5$year))
last5_slope<-coef(lmout)[2,1] * 5 #multiply by years in the trend (slope per year * number of years=rise over 5 years)
slope_tr<-if_else(last5_slope>sd, "arrowUp", if_else(last5_slope< c(-sd), "arrowDown","arrowRight"))
slope_sym<-if_else(last5_slope>sd, "", if_else(last5_slope< c(-sd), "",""))
slope_word<-if_else(last5_slope>sd, "an increasing", if_else(last5_slope< c(-sd), "a decreasing","a stable"))

vals<-data.frame(allminyear=allminyear,
allmaxyear=allmaxyear,
minyear=minyear,
maxyear=maxyear,
mean=mean,
sd=sd,
mean_tr=mean_tr,
slope_tr=slope_tr,
mean_sym=mean_sym,
slope_sym=slope_sym,
mean_word=mean_word,
slope_word=slope_word,
subnm=subs[i])


sub_list[[i]]<-vals
}
vals<-do.call("rbind",sub_list)

}
df_list$vals<-vals
}
df_list
}
69 changes: 69 additions & 0 deletions R/plot_fn_obj.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#' Title
#' @description
#' This function plots an indicator time series figure from data that is formated from the "data_prep" function in IEAnalyzeR.
#'
#'
#' @param df_obj Data object produced by the "data_prep" function.
#' @param interactive Run plot through plotly to create an interactive version of the plot.
#'
#' @return A plot in the indicatorTimeSeries format.
#' @export
#'
#' @examples
plot_fn_obj<-function(df_obj, interactive=FALSE) {


if (ncol(df_obj$data)<5.5){
#single plot
plot_main<-ggplot(data=df_obj$data, aes(x=year, y=value))+
geom_ribbon(data=df_obj$pos, aes(group=1,ymax=max, ymin=df_obj$vals$mean),fill="#7FFF7F")+
geom_ribbon(data=df_obj$neg, aes(group=1,ymax=df_obj$vals$mean, ymin=min), fill="#FF7F7F")+
geom_rect(aes(xmin=min(df_obj$data$year),xmax=max(df_obj$data$year),ymin=df_obj$vals$mean-df_obj$vals$sd, ymax=df_obj$vals$mean+df_obj$vals$sd), fill="white")+
geom_hline(yintercept=df_obj$vals$mean, lty="dashed")+
geom_hline(yintercept=df_obj$vals$mean+df_obj$vals$sd)+
geom_hline(yintercept=df_obj$vals$mean-df_obj$vals$sd)+
geom_line(aes(group=1), lwd=1)+
labs(x="Year", y=df_obj$labs[2,2], title = df_obj$labs[1,2])+
theme_bw() + theme(title = element_text(size=14, face = "bold"))

if (max(df_obj$data$year)-min(df_obj$data$year)>20) {
plot_main<-plot_main+scale_x_continuous(breaks = seq(min(df_obj$data$year),max(df_obj$data$year),5))
} else {
plot_main<-plot_main+scale_x_continuous(breaks = seq(min(df_obj$data$year),max(df_obj$data$year),2))
}

if (!interactive==F) {
plot_main=ggplotly(plot_main)
}
plot_main

} else {
#facet plot

plot_sec<-ggplot(data=df_obj$data, aes(x=year, y=value))+
facet_wrap(~subnm, ncol=ifelse(length(unique(df_obj$data$subnm))<4, 1, 2), scales = "free_y")+
geom_ribbon(data=df_obj$pos, aes(group=subnm,ymax=max, ymin=mean),fill="#7FFF7F")+
geom_ribbon(data=df_obj$neg, aes(group=subnm,ymax=mean, ymin=min), fill="#FF7F7F")+
geom_rect(data=merge(df_obj$data,df_obj$vals), aes(xmin=allminyear,xmax=allmaxyear,ymin=mean-sd, ymax=mean+sd), fill="white")+
geom_hline(aes(yintercept=mean), lty="dashed",data=df_obj$vals)+
geom_hline(aes(yintercept=mean+sd),data=df_obj$vals)+
geom_hline(aes(yintercept=mean-sd),data=df_obj$vals)+
geom_line(aes(group=1), lwd=0.75)+
labs(x="Year", y=df_obj$labs[2,2], title = df_obj$labs[1,2])+
theme_bw()+theme(strip.background = element_blank(),
strip.text = element_text(face="bold"),
title = element_text(size=14, face = "bold"))

if (max(df_obj$data$year)-min(df_obj$data$year)>20) {
plot_sec<-plot_sec+scale_x_continuous(breaks = seq(min(df_obj$data$year),max(df_obj$data$year),5))
} else {
plot_sec<-plot_sec+scale_x_continuous(breaks = seq(min(df_obj$data$year),max(df_obj$data$year),2))
}

if (!interactive==F) {
plot_sec=ggplotly(plot_sec)
}

plot_sec
}
}

0 comments on commit 813bfb0

Please sign in to comment.