From e5220dadb1329b7a53e264cf14ea8b8fe706b8ec Mon Sep 17 00:00:00 2001
From: Amanda Hart <144831749+amanda-hart@users.noreply.github.com>
Date: Fri, 22 Mar 2024 14:37:31 -0400
Subject: [PATCH] Add function to fill out stockEff CF template.

---
 stockEff/fillCF.R | 513 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 513 insertions(+)
 create mode 100644 stockEff/fillCF.R

diff --git a/stockEff/fillCF.R b/stockEff/fillCF.R
new file mode 100644
index 0000000..3e13712
--- /dev/null
+++ b/stockEff/fillCF.R
@@ -0,0 +1,513 @@
+#' @title Fill CF specification template for stockEff
+#' 
+#' @param species_itis 6 digit ITIS identifying the species for which data should be pulled, no default.
+#' @param stock_abbrev A string describing the stock abbreviation assigned by stock efficiency (this shows up in the web address for STOCKEFF products), no default.          
+#' @param assessment_abbrev A string describing the assessment abbreviation, no default. Common options include "MT", "RT", "OA"
+#' @param assessment_year The year in which the assessment will be reviewed, no default.
+#' @param data_years The range of years for which catch data should be formatted for stockEff CF excel template, no default.
+#' @param sex_type A string describing the sex type for which data should be pulled (this shows up in the web address for STOCKEFF products), default = "NONE". Common options include "NONE", "MALE", "FEMALE", "UNSEXED" 
+#' 
+#' @param tab_blocks A list providing the following information to populate the BLOCKS tab: (NOTE: LW_ID is populated by identifying the LW whose associated start and end months completely overlap with the block, otherwise an NA is returned)
+#' \itemize{
+#'   \item{nespp4 - A named vector of market categories to include in block tab, where names correspond to the nespp4 codes. No default}
+#'   \item{dropMarketYr - A table containing the nespp4 market category ("NESPP4" column) to "roll up" (i.e. drop) in the corrsponding year column("YEAR"), if provided landings are redistributed to other categories for that year/region (see "roll up" in Adding and Updating Stocks in StockEff documentation)}
+#'   \item{block_opt - A string for the following options to specify blocks for each market category and year: 
+#'           if = NULL (default), one block for every L-W relationship in tab_lw, 
+#'           if = "MONTH", one block for each month (useful for checking minimum number of lengths exceeded for larger blocks), 
+#'           if = "QUARTER", one block for each quarter,
+#'           if = "SEMESTER", one block for each semester,
+#'           if = "ANNUAL", one block per year for each market category
+#'           if = "checkLENGTHS", Assumes monthly blocks were submitted to stockEff, checks the number of monthly length samples from biosamp_summary.csv to determine smallest block supported by data for each market category and year. Must also provide minLenSample argument if this option is used.}
+#'   \item{minLenSample - A number specifying the minimum number of length sampels required to support a given block for a year and market category, only required if tab_blocks$block_opt == "checkLENGTHS"}
+#'   \item{autofillLW - A boolean, if TRUE fills LW_ID based on complete overlap with LW months and years, if FALSE leaves NAs for blocks without a perfect match to LW month and year definition, default = FALSE}
+#'   \item{mkt_missing - A string to govern years where 1+ market categories not length sampled, default = NULL:
+#'            if = "drop" then these yearse dropped and NAA is not estimated
+#'            if = NULL then return NAs for BLOCK_TYPE in these years}
+#'   \item{check_missing - A boolean, if TRUE check for missing market categories in data_years, also check if is.na(mkt_missing) == FALSE (i.e. if years with missing market categories will be dropped or further processed)}
+#' }
+#' @param tab_prorate A list providing the following information to populate the PRORATE tab:
+#' \itemize{
+#'   \item{turnOff - A vector of years for which unclassified landings should NOT be prorated, default prorates for all data_years.}
+#' }
+#' @param tab_regions A list providing the following information to populate the REGIONS tab: (NOTE: code is less well tested with >1 region)
+#' \itemize{
+#'   \item{stat_areas - A list of stat area vectors by region, no default. Example: list(c(510, 511), c(512, 513)) assumes 2 regions each containing 2 stat areas}
+#'   \item{region_custom - A optional custom matrix containing "REGION_ID", "AREA", and "YEAR" columns, not required if stat_areas object provided}
+#' }
+#' @param tab_lw A matrix or data.frame containing the following named columns of information for each LW data source. Currently only supports blocks with types, start and end months that match LW_PARAMS and are consistent across all years
+#' \itemize{
+#'   \item{ALPHA - Alpha parameter for each LW relationship}
+#'   \item{BETA - Beta parameter for each LW relationship}
+#'   \item{SOURCE - Description of LW parameter and link to source document}
+#'   \item{LW_TYPE - Type of LW relationship, options can include "SEMESTER", "ANNUAL", "CUSTOM"}
+#'   \item{LW_ID - ID fore each row of the table}
+#'   \item{MONTH_START - First month that LW_ID row may be used, processed in tab_BLOCKS and ultimately dropped from tab_LW_PARAMS}
+#'   \item{MONTH_END - Last month that LW_ID row may be used, processed in tab_BLOCKS and ultimately dropped from tab_LW_PARAMS}
+#'   \item{YEAR_START - First year that LW_ID row may be used, processed in tab_BLOCKS and ultimately dropped from tab_LW_PARAMS}
+#'   \item{YEAR_END - Last year that LW_ID row may be used, processed in tab_BLOCKS and ultimately dropped from tab_LW_PARAMS}
+#' }
+#' @param tab_sex A list providing the following information to populate the SEX_TYPE tab:
+#' \itemize{
+#'   \item{start_yr - The start year for CF data, default = first year in data_years.}
+#'   \item{end_yr - The end year for CF data, default = last year in data_years.}
+#' }
+#' @param outfile A string for the final exported file name, does NOT need to include .xlsx extention. No default.
+#' 
+#' NOTE: tab_blocks$block_opt = checkLENGTHS will NOT work unless connected to the VPN
+#' NOTE: ALK_HOLES, EXCLUSIONS, and LENGTH_IMPUTATIONS tabs are not populated by this function but placeholders are put in the resulting output file so that they can be filled externally as needed.
+#' NOTE: If you receive the following error, then the .xlsx file by the provided name already exists and can't be overwritten: [ERROR] workbook_close(): Error creating 'outfile.xlsx'. System error = Permission denied Error: Error in libxlsxwriter: 'Error creating output xlsx file. Usually a permissions error.'
+#' NOTE: If you find NAs in the REGION_ID for the BLOCKS tab, then there was a stat area in the stockeff_pre_prod.mv_cf_stock_data_length_o product that was not provided in the tab_regions$stat_areas argument.
+#'
+#' @return A list containing the following:
+#' \itemize{
+#'   \item{mkt_missing_yr - A table of market categories years for which 1+ market categories were not sampled where 1+ market categories were not sampled}
+#'   \item{regions_missing - Table of AREAs that appear in oracle data but not in tab_regions$stat_area argument for this function (may be missing blocking)}
+#' }
+
+library(tidyverse)
+library(DataExplorer)
+library(writexl)
+
+# GB cod example - this still uses the RT LW
+# fillLW <- read_csv("Data_workflow/RT_code_fromCP/lw_coefficients_cod_final.csv", show_col_types = FALSE) %>%
+#   filter(STOCK == "GB") %>%
+#   mutate(LW_TYPE = case_when(TB %in% c("QTR1", "QTR2", "QTR3", "QTR4") ~ "QUARTER",
+#                              TB %in% c("SEM1", "SEM2") ~ "SEMESTER",
+#                              TB == "ANN" ~ "ANNUAL"),
+#          MONTH_START = case_when(TB %in% c("QTR1", "SEM1", "ANN") ~ 1,
+#                                  TB == "QTR2" ~ 4,
+#                                  TB %in% c("QTR3", "SEM2") ~ 7,
+#                                  TB == "QTR4" ~ 10),
+#          MONTH_END = case_when(TB == "QTR1" ~ 3,
+#                                TB %in% c("QTR2", "SEM1") ~ 6,
+#                                TB == "QTR3" ~ 9,
+#                                TB %in% c("QTR4", "SEM2", "ANN") ~ 12),
+#          YEAR_START = 2019, YEAR_END = 2023,
+#          SOURCE = paste0("RT calculated LW: lw_coefficients_cod_final.csv ", TB, " ", YEAR_START, "-",YEAR_END)) %>%
+#   group_by(STOCK, TB, a, b) %>% mutate(LW_ID = cur_group_id())
+# 
+# fillCF(species_itis = 164712,
+#        stock_abbrev = "GBK",
+#        assessment_abbrev = "MT",
+#        assessment_year = 2024,
+#        data_years = 2019:2023, # I want an additional 3 years of data so I have overlap with the RT to check against
+#        sex_type = "NONE",
+#        tab_regions = list(stat_areas = list(c(464, 522, 523, 524, 525, 561, 562, 551, 552, 542, 543))), # From post-Hague line SAS code
+#        tab_lw = data.frame(ALPHA = fillLW$a,
+#                            BETA = fillLW$b,
+#                            SOURCE = fillLW$SOURCE,
+#                            LW_TYPE = fillLW$LW_TYPE,
+#                            LW_ID = fillLW$LW_ID,
+#                            MONTH_START = fillLW$MONTH_START,
+#                            MONTH_END = fillLW$MONTH_END,
+#                            YEAR_START = fillLW$YEAR_START,
+#                            YEAR_END = fillLW$YEAR_END),
+#        tab_blocks = list(nespp4 = c(LARGE = 811, Market = 813, Scrod = 814),# Available: c(Unknown = 0, Large = 811, Market = 813, Scrod = 814, Steaker = 816, "Undersized (MREM)" = 822) but only use 811, 813 and 814 (816 headless so no ages, 822 MREM undersized, 0 unknown)
+#                        minLenSample = 10,
+#                        block_opt = "checkLENGTHS",
+#                        autofillLW = TRUE,
+#                        mkt_missing = "drop"),
+#        outfile = here::here("Data_workflow", "GB_cod_CFfill_3-22-24"))
+
+#' # BSB SOUTH example
+#' # First grab LW info as processed for RT
+#' load(here::here("Data_workflow/BSBtest_Length_Weight_Data.RDATA")) # Load LW example for South BSB stock
+#' stock = "SOUTH"
+#' lm.obj <- get(paste("lm",tolower(stock),"sem.yrbin", sep='.'))
+#' lm.param.sem <- vector('list',length(lm.obj))
+#' names(lm.param.sem) <- names(lm.obj)
+#' 
+#' for (sem in names(lm.param.sem))
+#' {
+#'   # sem <- names(lm.param.sem)[1]
+#'   x <- lm.obj[[sem]]
+#'   lm.mat <- data.frame(do.call(rbind, 
+#'                                lapply(x, function(y) {
+#'                                  c( y$coefficients["log(LENGTH)"],
+#'                                     y$coefficients["(Intercept)"]
+#'                                  )
+#'                                } ))
+#'   )
+#'   colnames(lm.mat) <- c('Log.length','Intercept')
+#'   lm.mat$YrBin <- rownames(lm.mat)
+#'   rownames(lm.mat) <- NULL
+#'   lm.mat$Semester <- sem  
+#'   lm.param.sem[[sem]] <- lm.mat
+#' }
+#' lm.params <- do.call(rbind, lm.param.sem)
+#' rownames(lm.params) <- NULL
+#' 
+#' ### Create LW_Params table
+#' lw.params <- tibble(lm.params) %>%
+#'   mutate(
+#'     ALPHA = exp(lm.params$Intercept), .before=1
+#'   ) %>%
+#'   select(-Intercept) %>%
+#'   rename(BETA = Log.length) %>%
+#'   unite("SOURCE", Semester:YrBin, remove=FALSE) %>%
+#'   mutate(LW_TYPE = 'SEMESTER',
+#'          LW_ID = 1:n(),
+#'          .before = YrBin
+#'   ) %>%
+#'   mutate(YEAR_START = c(1989, 2002, 2012, 1989, 2002, 2012), # 1989 start years were calculated with data from 1992-2001 and used to fill 1989-1991 since no samples available for these early years
+#'          YEAR_END = c(2001, 2011, 2021, 2001, 2011, 2021),
+#'          MONTH_START = c(1,1,1,7,7,7),
+#'          MONTH_END = c(6,6,6,12,12,12))
+#' 
+#' fillCF(species_itis = 167687,
+#'  stock_abbrev = "SOUTH",
+#'  assessment_abbrev = "RT",
+#'  assessment_year = 2022,
+#'  data_years = 1989:2021,
+#'  sex_type = "NONE",
+#'  tab_regions = list(stat_areas = list(c('614', '615', '621', '622', '623', '624', '625', '626', '627', '628', '629', '631', '632', '633', '634', '635', '636', '637', '638', '639', '640'))),
+#'  tab_lw = data.frame(ALPHA = lw.params$ALPHA,
+#' BETA = lw.params$BETA,
+#' SOURCE = lw.params$SOURCE,
+#' LW_TYPE = lw.params$LW_TYPE,
+#' LW_ID = lw.params$LW_ID, 
+#' MONTH_START = lw.params$MONTH_START,
+#' MONTH_END = lw.params$MONTH_END,
+#' YEAR_START = lw.params$YEAR_START,
+#' YEAR_END = lw.params$YEAR_END),
+#'  tab_blocks = list(nespp4 = c('3350', '3351', '3352', '3353', '3355'),
+#'  block_opt = "SEMESTER",
+#'  autofillLW = TRUE),
+#' outfile =  here::here("test_SOUTH_BSB_CFfill"))
+#' 
+#' originalBLOCKS <- read.csv(here::here("Data_workflow", "BSBtest_CF_Specification.BLOCKS.167687.SOUTH.csv"))
+#' originalBLOCKS$NESPP4 <- as.character(originalBLOCKS$NESPP4)
+#' originalBLOCKS$REGION_ID <- as.character(originalBLOCKS$REGION_ID)
+#' originalBLOCKS$LW_ID <- as.character(originalBLOCKS$LW_ID)
+#' library(readxl)
+#' testBLOCKS <- read_excel(here::here("test_SOUTH_BSB_CFfill.xlsx"), sheet = "BLOCKS")
+#' # testBLOCKS$REGION_ID[nrow(testBLOCKS)] <- "2" # If uncommented makes 1 row not match in testBLOCKS, shows up in tidylog below
+#' library(tidylog)
+#' left_join(testBLOCKS, originalBLOCKS, by = colnames(originalBLOCKS))
+
+fillCF <- function(species_itis = NULL,
+                   stock_abbrev = NULL, 
+                   assessment_abbrev = NULL,
+                   assessment_year = NULL,
+                   data_years = NULL,
+                   sex_type = "NONE",
+                   tab_blocks = list(autofillLW = FALSE,
+                                     mkt_missing = NULL,
+                                     check_missing = FALSE),
+                   tab_prorate = NULL,
+                   tab_regions = NULL,
+                   tab_lw = NULL,
+                   tab_sex = NULL,
+                   outfile = NULL){
+  
+  
+  
+  # Set default list arguments that are not NULL 
+  if(is.null(tab_blocks$autofillLW)){ # If setting is not provided, ensure that the default is FALSE
+    tab_blocks$autofillLW = FALSE
+  }
+  if(is.null(tab_blocks$check_missing) == TRUE){ # If setting is not provided, ensure that the default is FALSE
+    tab_blocks$check_missing = FALSE
+  }
+  if(is.null(tab_blocks$mkt_missing) == FALSE){ # If manipulating years with missing market categories, check_missing must be TRUE
+    tab_blocks$check_missing == TRUE
+  } 
+    
+  # PRORATE tab
+  prorate <- rep("Y", length(data_years)) %>% cbind(data_years)
+  colnames(prorate) <- c("PRORATE_COMBINED", "YEAR")
+  if(is.null(tab_prorate$turnOff) == FALSE){
+    prorate[which(prorate[,"YEAR"] %in% tab_prorate$turnOff),"PRORATE_COMBINED"] <- "N" # Don't prorate specified years
+  }
+  
+  tab_PRORATE <- cbind(SPECIES_ITIS = species_itis,
+        STOCK_ABBREV = stock_abbrev,
+        SEX_TYPE = sex_type,
+        ASSESSMENT_ABBREV = assessment_abbrev,
+        SA_YEAR = assessment_year, 
+        YEAR = prorate[,"YEAR"],
+        PRORATE_COMBINED = prorate[,"PRORATE_COMBINED"])
+  
+  
+  # REGIONS tab
+  if(is.null(tab_regions$stat_area) == FALSE){ # Stat areas grouped by region (default option)
+    AREA <- unlist(tab_regions$stat_areas)
+    YEAR <- data_years
+    
+    REGION_ID <- NULL
+    for(iregion in 1:length(tab_regions$stat_areas)){
+      REGION_ID <- c(REGION_ID, rep(iregion, length(tab_regions$stat_areas[[iregion]])))
+    }
+    region_table <- cbind(AREA, REGION_ID) %>% as.data.frame()
+    
+    tab_REGIONS <- expand.grid(AREA = unlist(tab_regions$stat_areas), YEAR = data_years) %>% 
+      full_join(., region_table, by = "AREA") %>%
+      cbind(SPECIES_ITIS = species_itis,
+            STOCK_ABBREV = stock_abbrev,
+            SEX_TYPE = sex_type,
+            ASSESSMENT_ABBREV = assessment_abbrev,
+            SA_YEAR = assessment_year) %>%
+      relocate(c(REGION_ID, AREA, YEAR), .after = SA_YEAR) #!!! need to test with multiple regions, code works but not sure if all region 1 should be together followed by all region 2 for subsequent processing
+    
+  } else if(is.null(tab_regions$region_custom) == FALSE){ # Custom region ID, stat area, year grouping provided
+    tab_REGIONS <- cbind(SPECIES_ITIS = species_itis,
+                         STOCK_ABBREV = stock_abbrev,
+                         SEX_TYPE = sex_type,
+                         ASSESSMENT_ABBREV = assessment_abbrev,
+                         SA_YEAR = assessment_year, 
+                         REGION_ID = tab_regions$region_custom[,"REGION_ID"],
+                         AREA = tab_regions$region_custom[,"AREA"],
+                         YEAR = tab_regions$region_custom[,"YEAR"])
+  }
+  tab_REGIONS$AREA <- as.character(tab_REGIONS$AREA) # So type match when setting up blocks
+  
+  
+  # LW_PARAMS tab
+  tab_LW_PARAMS <- cbind(SPECIES_ITIS = species_itis,
+                         STOCK_ABBREV = stock_abbrev,
+                         SEX_TYPE = sex_type,
+                         ASSESSMENT_ABBREV = assessment_abbrev,
+                         SA_YEAR = assessment_year, 
+                         ALPHA = tab_lw[,"ALPHA"],
+                         BETA = tab_lw[,"BETA"],
+                         SOURCE = tab_lw[,"SOURCE"],
+                         LW_TYPE = tab_lw[,"LW_TYPE"],
+                         LW_ID = tab_lw[,"LW_ID"],
+                         MONTH_START = tab_lw[,"MONTH_START"],
+                         MONTH_END = tab_lw[,"MONTH_END"],
+                         YEAR_START = tab_lw[,"YEAR_START"],
+                         YEAR_END = tab_lw[,"YEAR_END"]) %>% 
+    as.data.frame() 
+  
+  # BLOCKS tab
+  if(tab_blocks$block_opt == "MONTH") { # Implement monthly blocks for each market category and year
+    tempBlock <- expand.grid(YEAR = data_years,
+                             NESPP4 = tab_blocks$nespp4,
+                             REGION_ID = unique(tab_REGIONS$REGION_ID),
+                             BLOCK_TYPE = "CUSTOM",
+                             MONTH_START = as.numeric(1:12)) %>% 
+      mutate(MONTH_END = MONTH_START)
+  } else if(tab_blocks$block_opt == "QUARTER"){ # Implement quarterly blocks for each market category and year
+    tempBlock <- expand.grid(YEAR = data_years,
+                             NESPP4 = tab_blocks$nespp4,
+                             REGION_ID = unique(tab_REGIONS$REGION_ID),
+                             BLOCK_TYPE = "QUARTER",
+                             MONTH_START = c(1,4,7,10)) %>% 
+      mutate(MONTH_END = MONTH_START+2)
+  } else if(tab_blocks$block_opt == "SEMESTER"){ # Implement semester blocks for each market category and year
+    tempBlock <- expand.grid(YEAR = data_years,
+                             NESPP4 = tab_blocks$nespp4,
+                             REGION_ID = unique(tab_REGIONS$REGION_ID),
+                             BLOCK_TYPE = "SEMESTER",
+                             MONTH_START = c(1,7)) %>% 
+      mutate(MONTH_END = MONTH_START+5)
+  } else if(tab_blocks$block_opt == "ANNUAL"){ # Implement a single annual block for each market category and year
+    tempBlock <- expand.grid(YEAR = data_years,
+                             NESPP4 = tab_blocks$nespp4,
+                             REGION_ID = unique(tab_REGIONS$REGION_ID),
+                             BLOCK_TYPE = "ANNUAL",
+                             MONTH_START = c(1),
+                             MONTH_END = c(12)) 
+  } else if(tab_blocks$block_opt == "checkLENGTHS" | tab_blocks$check_missing == TRUE){ 
+    ## Read in ages and lengths if 1) blocking should be done based on length availability, or 2) code should check for missing market categories in data_years
+    
+    # Connect to oracle
+    connection <- dbConnect(drv = dbDriver("Oracle"),
+                            username = rstudioapi::askForPassword("Oracle user name"), 
+                            password = rstudioapi::askForPassword("Oracle password"),
+                            dbname = rstudioapi::askForPassword("Oracle database name"))
+    
+    # Pull age information by species, stock, and year
+    checkAges <- ROracle::dbGetQuery(connection, statement = paste0("SELECT * FROM stockeff_pre_prod.mv_cf_stock_data_age_o WHERE species_itis = ", species_itis, " AND stock_abbrev = '", stock_abbrev,"'", " AND year >= ", data_years[1], " AND year <= ", data_years[length(data_years)]))
+    
+    # Pull lengths by species, stock, and year
+    checkLengths <- ROracle::dbGetQuery(connection, statement = paste0("SELECT * FROM stockeff_pre_prod.mv_cf_stock_data_length_o WHERE species_itis = ", species_itis, " AND stock_abbrev = '", stock_abbrev,"'", " AND year >= ", data_years[1], " AND year <= ", data_years[length(data_years)])) %>%
+      filter(NESPP4 %in% paste0("0",tab_blocks$nespp4)) %>% # Add 0 at start of NESPP4 code and filter only the selected market categories
+      left_join(.,tab_REGIONS[,c("AREA", "REGION_ID", "YEAR")], by = c("AREA", "YEAR")) %>%
+      mutate(SEM = case_when(QTR %in% c(1,2) ~ 1,
+                             QTR %in% c(3,4) ~ 2))
+    
+    if(tab_blocks$check_missing == TRUE){
+      regions_missing <- checkLengths %>% filter(is.na(REGION_ID) == TRUE) %>% select(YEAR, MONTH, AREA, NESPP4, LENGTH, NO_AT_LENGTH)
+      if(nrow(regions_missing) == 0){ # If no regions missing, also check for missing market categories
+        mkt_missing_yr <- expand.grid(YEAR = data_years,
+                                      QTR = as.character(1:4),
+                                      NESPP4 = paste0("0",tab_blocks$nespp4)) %>% 
+          mutate(SEM = case_when(QTR %in% c(1,2) ~ 1,
+                                 QTR %in% c(3,4) ~ 2)) %>%
+          left_join({checkLengths %>% group_by(YEAR, NESPP4) %>% summarise(LENGTHS_ANN = sum(NO_AT_LENGTH, na.rm = TRUE))}) %>% # NAs in LENGTHS_ANN when no lengths for that year/NESPP4 grouping
+          group_by(YEAR, NESPP4) %>% 
+          mutate(mktComplete = ifelse(any(is.na(LENGTHS_ANN) == TRUE), FALSE, TRUE)) %>% 
+          filter(mktComplete == FALSE) %>% # Pull out market categories that are missing by year
+          distinct(YEAR,NESPP4,LENGTHS_ANN,mktComplete) %>%
+          arrange(YEAR)
+      } else{ # If region missing, return a warning and do not check for missing market categories
+        mkt_missing_yr <- "Not calculated due to missing regions, check that tab_regions$stat_area input is complete."
+      }
+
+    }
+    
+    # Disconnect from database
+    dbDisconnect(connection)
+    
+    if(tab_blocks$block_opt == "checkLENGTHS"){
+      # Check lengths
+      defineBlocks <- expand.grid(YEAR = data_years,
+                                  QTR = as.character(1:4),
+                                  NESPP4 = paste0("0",tab_blocks$nespp4)) %>% 
+        mutate(SEM = case_when(QTR %in% c(1,2) ~ 1,
+                               QTR %in% c(3,4) ~ 2)) %>%
+        # Sum lengths by quarter
+        left_join(.,{checkLengths %>% group_by(YEAR, NESPP4, QTR) %>% summarise(LENGTHS_QTR = sum(NO_AT_LENGTH))}) %>%
+        left_join({checkLengths %>% group_by(YEAR, NESPP4, SEM) %>% summarise(LENGTHS_SEM = sum(NO_AT_LENGTH))}) %>%
+        left_join({checkLengths %>% group_by(YEAR, NESPP4) %>% summarise(LENGTHS_ANN = sum(NO_AT_LENGTH))}) %>%
+        
+        # Replace NAs with 0s used in checking complete market sampling
+        mutate(LENGTHS_QTR = ifelse(is.na(LENGTHS_QTR), 0, LENGTHS_QTR),
+               LENGTHS_SEM = ifelse(is.na(LENGTHS_SEM), 0, LENGTHS_SEM),
+               LENGTHS_ANN = ifelse(is.na(LENGTHS_ANN), 0, LENGTHS_ANN)) %>%
+        group_by(YEAR) %>% mutate(mktComplete = ifelse(any(LENGTHS_ANN == 0), FALSE, TRUE)) %>%
+        group_by(YEAR, NESPP4, SEM, QTR) %>%
+        dplyr::summarise(lengths_QTR = unique(LENGTHS_QTR), lengths_SEM = unique(LENGTHS_SEM), lengths_ANN = unique(LENGTHS_ANN), REGION_ID = unique(REGION_ID)) %>% # Collapse duplicates by month
+        group_by(YEAR, NESPP4) %>%
+        summarise(BLOCK_TYPE = case_when((sum(lengths_QTR >= tab_blocks$minLenSample) == 4) ~ "QUARTER", # Assign block
+                                         (sum(lengths_SEM >= tab_blocks$minLenSample) == 4) ~ "SEMESTER", #!!1 doesn't work, lengths_SEM needs to check if sufficient lengths in both semester, currently if <4 quarters sampled then won't do annual blocking even if # of lengths is correct, need to add expand.grid at beginning like Charles' code did in order to guarantee 4 months of info
+                                         (sum(lengths_ANN >= tab_blocks$minLenSample) == 4) ~ "ANNUAL"), # BLOCK_TYPE = NA if 1+ market category not sampled in a given year
+                  REGION_ID = unique(REGION_ID)) # Retain for final tab_BLOCKS formatting
+      
+      quarters <- expand.grid(BLOCK_TYPE = "QUARTER",
+                              MONTH_range = c("1_3", "4_6", "7_9", "10_12")) # Month range for each quarter
+      semesters <- expand.grid(BLOCK_TYPE = "SEMESTER",
+                               MONTH_range = c("1_6", "7_12")) # Month range for each semester
+      annual <- expand.grid(BLOCK_TYPE = "ANNUAL",
+                            MONTH_range = c("1_12"))
+      months <- rbind(quarters, semesters, annual)
+      
+      # Expand starting and end month for each block type
+      tempBlock <- full_join(defineBlocks, months, by = "BLOCK_TYPE", relationship = "many-to-many") %>% 
+        separate_wider_delim(MONTH_range, delim = "_", names = c("MONTH_START", "MONTH_END"))
+      
+    } # End statement to define checkLengths blocks
+    
+    } # End tab_blocks$opt and tab_blocks$check_missing statements
+  
+  # Handle incomplete market category
+  if(tab_blocks$mkt_missing == "drop"){ # Drop years that didn't sample a market category
+    tempBlock <- tempBlock %>% filter(YEAR %in% mkt_missing_yr$YEAR == FALSE)
+  } 
+  
+  # Auto-fill LW_ID using month and year range provide with each L-W relationship(if no option satisfied then return NA and must revisit by hand)
+  tempBlock$LW_ID <- NA
+  # Fill based on specified LW months
+  noAnnual <- tab_LW_PARAMS %>% filter(LW_TYPE != "ANNUAL") # Handle separately, otherwise risk using ANNUAL L-W for all blocks
+  for(iLW in 1:nrow(noAnnual)){
+    tempBlock$LW_ID[which(as.numeric(tempBlock$MONTH_START) >= as.numeric(tab_LW_PARAMS$MONTH_START[iLW]) & # Check if months fully overlap with a LW definition: 
+                            as.numeric(tempBlock$MONTH_START) <= as.numeric(tab_LW_PARAMS$MONTH_END[iLW]) & 
+                            as.numeric(tempBlock$MONTH_END) <= as.numeric(tab_LW_PARAMS$MONTH_END[iLW]) &
+                            as.numeric(tempBlock$YEAR) %in% tab_LW_PARAMS$YEAR_START[iLW]:tab_LW_PARAMS$YEAR_END[iLW]
+                            )] <- tab_LW_PARAMS$LW_ID[iLW] 
+  }
+  # Fill LW_ID with exact month and year matches
+  tempBlock$LW_ID <- NA
+  for(iLW in 1:nrow(noAnnual)){
+    tempBlock$LW_ID[which(as.numeric(tempBlock$MONTH_START) == as.numeric(tab_LW_PARAMS$MONTH_START[iLW]) &
+                          as.numeric(tempBlock$MONTH_END) == as.numeric(tab_LW_PARAMS$MONTH_END[iLW]) &
+                          as.numeric(tempBlock$YEAR) %in% tab_LW_PARAMS$YEAR_START[iLW]:tab_LW_PARAMS$YEAR_END[iLW])] <- tab_LW_PARAMS$LW_ID[iLW]
+  }
+  if(tab_blocks$autofillLW == TRUE){
+    # Fill remaining LW_ID when block months entirely overlap LW months AND fall within LW year range, if no LW_IDs meet this condition then the ID remains an NA
+    for(iLW in 1:nrow(noAnnual)){
+      tempBlock$LW_ID[which(is.na(tempBlock$LW_ID) & # Of remaining rows with no LW_ID assigned check the following:
+                              as.numeric(tempBlock$MONTH_START) >= as.numeric(tab_LW_PARAMS$MONTH_START[iLW]) & # Check if months fully overlap with a LW definition: 
+                              as.numeric(tempBlock$MONTH_START) <= as.numeric(tab_LW_PARAMS$MONTH_END[iLW]) & 
+                              as.numeric(tempBlock$MONTH_END) <= as.numeric(tab_LW_PARAMS$MONTH_END[iLW]) &
+                              as.numeric(tempBlock$YEAR) %in% tab_LW_PARAMS$YEAR_START[iLW]:tab_LW_PARAMS$YEAR_END[iLW])] <- tab_LW_PARAMS$LW_ID[iLW] # Check if year falls within LW definition, if yes assign that LW_ID
+    }
+  }
+  
+  # Fill annual LW if available
+  if("ANNUAL" %in% tab_LW_PARAMS$LW_TYPE){
+    tempBlock$LW_ID[which(tempBlock$MONTH_START == 1 & tempBlock$MONTH_END == 12)] <- tab_LW_PARAMS$LW_ID[which(tab_LW_PARAMS$LW_TYPE == "ANNUAL")]
+  }
+  
+  # Finish block formatting
+  tab_BLOCKS <- tempBlock %>% group_by(YEAR, NESPP4, REGION_ID) %>% 
+    mutate(BLOCK_ID = seq_along(YEAR)) %>% # Add the block ID numbers
+    cbind(SPECIES_ITIS = species_itis,
+          STOCK_ABBREV = stock_abbrev,
+          SEX_TYPE = sex_type,
+          ASSESSMENT_ABBREV = assessment_abbrev,
+          SA_YEAR = assessment_year)  %>% as.data.frame() %>%
+    relocate(c(YEAR, NESPP4, REGION_ID, BLOCK_ID, BLOCK_TYPE, MONTH_START, MONTH_END, LW_ID), .after = SA_YEAR)
+  
+  # If specification drops some market categories in some years then drop here
+  if(is.null(tab_blocks$dropMarketYR) == FALSE){ 
+    tab_BLOCKS <- tab_BLOCKS  %>% 
+      full_join(., tab_blocks$dropMarketYR, by = c("NESPP4", "YEAR")) %>%
+      replace_na(list(drop_market=FALSE)) %>%
+      filter(drop_market == FALSE) %>% # Keep market categories in years where not dropped 
+      DataExplorer::drop_columns("drop_market")
+  }
+  
+  # SEX_TYPE tab
+  start <- head(data_years, n=1)
+  end <- tail(data_years, n=1)
+  if(is.null(tab_sex$start_yr) == FALSE){
+    start <- tab_sex$start_yr
+  }
+  if(is.null(tab_sex$end_yr) == FALSE){
+    end <- tab_sex$end_yr
+  }
+  
+  tab_SEX_TYPE <- cbind(SPECIES_ITIS = species_itis,
+                        STOCK_ABBREV = stock_abbrev,
+                        ASSESSMENT_ABBREV = assessment_abbrev,
+                        SA_YEAR = assessment_year,
+                        CF_START_YEAR = start,
+                        CF_END_YEAR = end,
+                        SEX_TYPE = sex_type)
+  
+  
+  # ALK_HOLES tab - optional, can use stockEff auto-fill
+  tab_ALK_HOLES <- data.frame(matrix(vector(), 0, 16, dimnames = list(c(), c("SPECIES_ITIS", "STOCK_ABBREV", "SEX_TYPE", "YEAR", "REGION_ID", "BLOCK_ID", "LENGTH_UOM", "LENGTH", "LENGTH", "AGE_UOM",
+                                                                             "AGE", "NO_AT_AGE", "NESPP4", "ASSESSMENT_ABBREV", "SA_YEAR", "SOURCE"))),
+                              stringsAsFactors = FALSE)
+
+  # EXCLUSIONS tab - optional if some data should be excluded
+  tab_EXCLUSIONS <- data.frame(matrix(vector(), 0, 21, dimnames = list(c(), c("SPECIES_ITIS", "STOCK_ABBREV", "SEX_TYPE", "ASSESSMENT_ABBREV", "SA_YEAR", "YEAR", "NESPP4", "AREA",
+                                                                              "PORT", "MONTH", "DAY", "CATDISP", "SEX", "LINK", "BIOSAMP_SOURCE", "LENGTH", "LENGTH_UOM",
+                                                                              "AGESTRCT", "AGE", "AGE_UOM", "REASON"))),
+                               stringsAsFactors = FALSE)
+  
+  # LENGTH_IMPUTATIONS tab - optional, provide imputations for market category/region/block/length if missing numbers at length
+  tab_LENGTH_IMPUTATIONS <- data.frame(matrix(vector(), 0, 12, dimnames = list(c(), c("STOCK_ABBREV", "SEX_TYPE", "ASSESSMENT_ABBREV", "SA_YEAR", "YEAR", "NESPP4", "REGION_ID",
+                                                                                      "BLOCK_ID", "LENGTH_UOM", "LENGTH", "NO_AT_LENGTH", "SOURCE"))),
+                                       stringsAsFactors = FALSE)
+  
+  ## Write tables to tabs of CF excel template
+  
+  # Remove temporary month and year start/end columns used to auto-assign LW relationships to blocks
+  tab_LW_PARAMS <- tab_LW_PARAMS %>% as.data.frame() %>% drop_columns(c("MONTH_START", "MONTH_END", "YEAR_START", "YEAR_END"))  %>% as.data.frame()
+  
+  # Generate list of required tabs to output
+  write_list <- list("BLOCKS" = as.data.frame(tab_BLOCKS),
+                     "PRORATE" = as.data.frame(tab_PRORATE),
+                     "REGIONS" = as.data.frame(tab_REGIONS),
+                     "LW_PARAMS" = tab_LW_PARAMS,
+                     "SEX_TYPE" = as.data.frame(tab_SEX_TYPE),
+                     "ALK_HOLES" = as.data.frame(tab_ALK_HOLES), # Last 3 tabs are placeholders that can be filled externally to this script as needed
+                     "EXCLUSIONS" = as.data.frame(tab_EXCLUSIONS),
+                     "LENGTH_IMPUTATIONS" = as.data.frame(tab_LENGTH_IMPUTATIONS))
+  
+  # Actually write to template
+  library("writexl")
+  write_xlsx(write_list, paste0(outfile, ".xlsx")) # Drop information used in setting up tab_BLOCKS but not required in tab_LW_PARAMS before writing to template
+  
+  # Return
+  return_list <- list(mkt_missing_yr = mkt_missing_yr,   # Vector of years where 1+ market categories were not sampled
+                      regions_missing = regions_missing) # Table of AREAs that appear in oracle data but not in tab_regions$stat_area argument for this function (may be missing blocking)
+  
+  return(return_list) 
+}
+
+