diff --git a/R/RcppExports.R b/R/RcppExports.R
index 26b075b..ca2a353 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -2,6 +2,5 @@
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 calculate_log_probability <- function(kmer_genus_count, word_specific_priors, genus_counts) {
-    .Call(`_phylotypr_calculate_log_probability`, kmer_genus_count, word_specific_priors, genus_counts)
+  .Call(`_phylotypr_calculate_log_probability`, kmer_genus_count, word_specific_priors, genus_counts)
 }
-
diff --git a/benchmarking/benchmark_str.R b/benchmarking/benchmark_str.R
index 606caaa..cfcdcc2 100644
--- a/benchmarking/benchmark_str.R
+++ b/benchmarking/benchmark_str.R
@@ -28,7 +28,7 @@ n_kmers <- stringi::stri_length(sequence) - 8 + 1
 
 b_substr <- function() {
   seq_kmers <- character(length = n_kmers)
-  for(i in seq_along(1:n_kmers)){
+  for (i in seq_along(1:n_kmers)) {
     seq_kmers[i] <- substr(sequence, i, i + 8 - 1)
   }
 }
@@ -36,17 +36,17 @@ b_substr <- function() {
 b_substring <- function() {
   substring(sequence, 1:n_kmers, 8:1500)
 }
-i_substring <- function(){
+i_substring <- function() {
   stringi::stri_sub(sequence, 1:n_kmers, 8:1500)
 }
-r_substring <- function(){
+r_substring <- function() {
   stringr::str_sub(sequence, 1:n_kmers, 8:1500)
 }
 
 microbenchmark(b_substr(), b_substring(), i_substring(), r_substrin)
 
-#toupper and tolower
-#seq_to_base4
+# toupper and tolower
+# seq_to_base4
 sequence_upper <- sequence
 sequence_lower <- tolower(sequence)
 
@@ -69,44 +69,46 @@ r_toupper <- function(x) {
   stringr::str_to_upper(x)
 }
 
-microbenchmark(b_toupper(sequence_upper),
-               b_toupper(sequence_lower),
-               b_tolower(sequence_upper),
-               b_tolower(sequence_lower),
-
-               i_toupper(sequence_upper),
-               i_toupper(sequence_lower),
-               i_tolower(sequence_upper),
-               i_tolower(sequence_lower),
-
-               r_toupper(sequence_upper),
-               r_toupper(sequence_lower),
-               r_tolower(sequence_upper),
-               r_tolower(sequence_lower))
+microbenchmark(
+  b_toupper(sequence_upper),
+  b_toupper(sequence_lower),
+  b_tolower(sequence_upper),
+  b_tolower(sequence_lower),
+  i_toupper(sequence_upper),
+  i_toupper(sequence_lower),
+  i_tolower(sequence_upper),
+  i_tolower(sequence_lower),
+  r_toupper(sequence_upper),
+  r_toupper(sequence_lower),
+  r_tolower(sequence_upper),
+  r_tolower(sequence_lower)
+)
 
 
 # want to replace non-atgc with a n character
 sequence_good <- sequence
 sequence_bad <- paste0(sequence_good, "R")
 
-b_gsub <- function(x){
+b_gsub <- function(x) {
   gsub(pattern = "[^ACGT]", replacement = "N", x = x)
 }
-i_replace <- function(x){
+i_replace <- function(x) {
   stringi::stri_replace_all_charclass(x, "[^ATGC]", "N")
 }
-r_replace <- function(x){
+r_replace <- function(x) {
   stringr::str_replace_all(x, "[^ATGC]", "N")
 }
 
-microbenchmark(b_gsub(sequence_good), b_gsub(sequence_bad),
-               i_replace(sequence_good), i_replace(sequence_bad),
-               r_replace(sequence_good), r_replace(sequence_bad))
+microbenchmark(
+  b_gsub(sequence_good), b_gsub(sequence_bad),
+  i_replace(sequence_good), i_replace(sequence_bad),
+  r_replace(sequence_good), r_replace(sequence_bad)
+)
 
 
 # need to convert A C G T to 0 1 2 3
 
-b_chartr <- function(){
+b_chartr <- function() {
   chartr("ACGT", "0123", sequence)
 }
 i_chartr <- function() {
diff --git a/benchmarking/benchmark_tab.R b/benchmarking/benchmark_tab.R
index 328d387..4e0dcfc 100644
--- a/benchmarking/benchmark_tab.R
+++ b/benchmarking/benchmark_tab.R
@@ -4,7 +4,7 @@ set.seed(19760620)
 fake_kmers <- sample(1:10, size = 50, replace = TRUE) + 20
 n_kmers <- 64
 
-k_table <- function(){
+k_table <- function() {
   kmer_table <- table(fake_kmers)
   kmer_values <- names(kmer_table) |> as.numeric()
   kmer_counts <- as.numeric(kmer_table)
@@ -13,7 +13,7 @@ k_table <- function(){
   kmer_vector
 }
 
-k_tabulate <- function(){
+k_tabulate <- function() {
   tabulate(fake_kmers, nbins = n_kmers)
 }
 
diff --git a/benchmarking/benchmarking.R b/benchmarking/benchmarking.R
index ffd0ae6..23fabf4 100644
--- a/benchmarking/benchmarking.R
+++ b/benchmarking/benchmarking.R
@@ -4,7 +4,7 @@ library(Rcpp)
 
 vector_grow_append <- function(x) {
   output <- numeric()
-  for(i in 1:x) {
+  for (i in 1:x) {
     output <- append(output, i^2)
   }
   output
@@ -12,7 +12,7 @@ vector_grow_append <- function(x) {
 
 list_grow_append <- function(x) {
   output <- vector(mode = "list")
-  for(i in 1:x) {
+  for (i in 1:x) {
     output <- append(output, i^2)
   }
   output
@@ -20,7 +20,7 @@ list_grow_append <- function(x) {
 
 vector_grow_c <- function(x) {
   output <- numeric()
-  for(i in 1:x) {
+  for (i in 1:x) {
     output <- c(output, i^2)
   }
   output
@@ -28,7 +28,7 @@ vector_grow_c <- function(x) {
 
 list_grow_c <- function(x) {
   output <- vector(mode = "list")
-  for(i in 1:x) {
+  for (i in 1:x) {
     output <- c(output, i^2)
   }
   output
@@ -36,7 +36,7 @@ list_grow_c <- function(x) {
 
 vector_grow_br <- function(x) {
   output <- numeric()
-  for(i in 1:x) {
+  for (i in 1:x) {
     output[i] <- i^2
   }
   output
@@ -44,7 +44,7 @@ vector_grow_br <- function(x) {
 
 list_grow_br <- function(x) {
   output <- vector(mode = "list")
-  for(i in 1:x) {
+  for (i in 1:x) {
     output[i] <- i^2
   }
   output
@@ -52,7 +52,7 @@ list_grow_br <- function(x) {
 
 vector_prealloc_sng <- function(x) {
   output <- numeric(x)
-  for(i in 1:x) {
+  for (i in 1:x) {
     output[i] <- i^2
   }
   output
@@ -60,7 +60,7 @@ vector_prealloc_sng <- function(x) {
 
 vector_prealloc_dbl <- function(x) {
   output <- numeric(x)
-  for(i in 1:x) {
+  for (i in 1:x) {
     output[[i]] <- i^2
   }
   output
@@ -68,7 +68,7 @@ vector_prealloc_dbl <- function(x) {
 
 list_prealloc_dbl <- function(x) {
   output <- vector(mode = "list", x)
-  for(i in 1:x) {
+  for (i in 1:x) {
     output[[i]] <- i^2
   }
   output
@@ -98,28 +98,32 @@ list_xapply <- function(x) {
   lapply(1:x, \(i) i^2)
 }
 
-vector_map <- function(x){
+vector_map <- function(x) {
   map_dbl(1:x, \(i) i^2)
 }
 
-list_map <- function(x){
+list_map <- function(x) {
   map(1:x, \(i) i^2)
 }
 
-vector_magrittr <- function(x){
+vector_magrittr <- function(x) {
   1:x %>% (\(i) i^2)()
 }
 
-list_magrittr <- function(x){
-  1:x %>% (\(i) i^2)() %>% as.list()
+list_magrittr <- function(x) {
+  1:x %>%
+    (\(i) i^2)() %>%
+    as.list()
 }
 
-vector_base <- function(x){
+vector_base <- function(x) {
   1:x |> (\(i) i^2)()
 }
 
-list_base <- function(x){
-  1:x |> (\(i) i^2)() |> as.list()
+list_base <- function(x) {
+  1:x |>
+    (\(i) i^2)() |>
+    as.list()
 }
 
 
@@ -127,39 +131,36 @@ n <- 1e4
 sourceCpp("benchmarking.cpp")
 
 mb_by_n <- function(n) {
-
-    microbenchmark(
-      vector_grow_append(n),
-      vector_grow_c(n),
-      vector_grow_br(n),
-      # vector_prealloc_sng(n),
-      vector_prealloc_dbl(n),
-      vector_colon(n),
-      vector_seq(n),
-      vector_xapply(n),
-      vector_rcpp(n),
-      vector_base(n),
-      vector_magrittr(n),
-      vector_map(n),
-
-      list_grow_append(n),
-      list_grow_c(n),
-      list_grow_br(n),
-      # list_prealloc_sng(n),
-      list_prealloc_dbl(n),
-      list_colon(n),
-      list_seq(n),
-      list_xapply(n),
-      list_rcpp(n),
-      list_base(n),
-      list_magrittr(n),
-      list_map(n)
-      ) %>%
+  microbenchmark(
+    vector_grow_append(n),
+    vector_grow_c(n),
+    vector_grow_br(n),
+    # vector_prealloc_sng(n),
+    vector_prealloc_dbl(n),
+    vector_colon(n),
+    vector_seq(n),
+    vector_xapply(n),
+    vector_rcpp(n),
+    vector_base(n),
+    vector_magrittr(n),
+    vector_map(n),
+    list_grow_append(n),
+    list_grow_c(n),
+    list_grow_br(n),
+    # list_prealloc_sng(n),
+    list_prealloc_dbl(n),
+    list_colon(n),
+    list_seq(n),
+    list_xapply(n),
+    list_rcpp(n),
+    list_base(n),
+    list_magrittr(n),
+    list_map(n)
+  ) %>%
     group_by(expr) %>%
     summarize(median_time = median(time)) %>%
     arrange(-median_time) %>%
     mutate(n = n)
-
 }
 
 ns <- c(1, 10, 100, 1000, 2500, 5000, 7500, 10000, 12500, 15000)
@@ -178,32 +179,31 @@ mb_data %>%
 
 
 mb_by_n(1e4) %>%
-  mutate(expr = str_replace(expr, "\\(n\\)", ""),
-         expr = str_replace(expr, "_", "-")) %>%
+  mutate(
+    expr = str_replace(expr, "\\(n\\)", ""),
+    expr = str_replace(expr, "_", "-")
+  ) %>%
   separate_wider_delim(expr, names = c("structure", "f"), delim = "-") %>%
   select(-n) %>%
   pivot_wider(names_from = structure, values_from = median_time)
 
 vector_get_one <- function(x) {
-
   index <- c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50)
 
   x[5]
-
 }
 
 vector_get_one_named <- function(x) {
-
-  index <- c("ccx", "grs", "lmh", "bls", "gjy",
-             "eee", "arn", "kwp", "ffz", "kcn")
+  index <- c(
+    "ccx", "grs", "lmh", "bls", "gjy",
+    "eee", "arn", "kwp", "ffz", "kcn"
+  )
 
   x["lmh"]
-
 }
 
 
 vector_get_ten <- function(x) {
-
   index <- c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50)
 
   x[5]
@@ -216,13 +216,13 @@ vector_get_ten <- function(x) {
   x[40]
   x[45]
   x[50]
-
 }
 
 vector_get_ten_named <- function(x) {
-
-  index <- c("ccx", "grs", "lmh", "bls", "gjy",
-             "eee", "arn", "kwp", "ffz", "kcn")
+  index <- c(
+    "ccx", "grs", "lmh", "bls", "gjy",
+    "eee", "arn", "kwp", "ffz", "kcn"
+  )
 
   x["ccx"]
   x["grs"]
@@ -238,38 +238,39 @@ vector_get_ten_named <- function(x) {
 
 
 vector_get_c <- function(x) {
-
   index <- c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50)
 
-  c(x[5], x[10], x[15], x[20], x[25],
-    x[30], x[35], x[40], x[45], x[50])
-
+  c(
+    x[5], x[10], x[15], x[20], x[25],
+    x[30], x[35], x[40], x[45], x[50]
+  )
 }
 
 vector_get_c_named <- function(x) {
-
-  index <- c("ccx", "grs", "lmh", "bls", "gjy",
-             "eee", "arn", "kwp", "ffz", "kcn")
-
-  c(x["ccx"],  x["grs"],  x["lmh"],  x["bls"],
-    x["gjy"],  x["eee"],  x["arn"],  x["kwp"],
-    x["ffz"],  x["kcn"])
+  index <- c(
+    "ccx", "grs", "lmh", "bls", "gjy",
+    "eee", "arn", "kwp", "ffz", "kcn"
+  )
+
+  c(
+    x["ccx"], x["grs"], x["lmh"], x["bls"],
+    x["gjy"], x["eee"], x["arn"], x["kwp"],
+    x["ffz"], x["kcn"]
+  )
 }
 
 vector_get_index <- function(x) {
-
   index <- c(5, 10, 15, 20, 25, 30, 35, 40, 45, 50)
   x[index]
-
 }
 
 
 vector_get_index_named <- function(x) {
-
-  index <- c("ccx", "grs", "lmh", "bls", "gjy",
-             "eee", "arn", "kwp", "ffz", "kcn")
+  index <- c(
+    "ccx", "grs", "lmh", "bls", "gjy",
+    "eee", "arn", "kwp", "ffz", "kcn"
+  )
   x[index]
-
 }
 
 long_vector <- (1:1e4)^2
@@ -280,25 +281,27 @@ n <- expand_grid(letters, letters, letters) |>
 long_namedlist <- long_list
 names(long_namedlist) <- n[1:length(long_list)]
 
-microbenchmark(vector_get_one(long_vector),
-               vector_get_ten(long_vector),
-               vector_get_c(long_vector),
-               vector_get_index(long_vector),
-               vector_get_one(long_list),
-               vector_get_ten(long_list),
-               vector_get_c(long_list),
-               vector_get_index_named(long_list),
-               vector_get_one_named(long_namedlist),
-               vector_get_ten_named(long_namedlist),
-               vector_get_c_named(long_namedlist),
-               vector_get_index_named(long_namedlist)
-               ) %>%
+microbenchmark(
+  vector_get_one(long_vector),
+  vector_get_ten(long_vector),
+  vector_get_c(long_vector),
+  vector_get_index(long_vector),
+  vector_get_one(long_list),
+  vector_get_ten(long_list),
+  vector_get_c(long_list),
+  vector_get_index_named(long_list),
+  vector_get_one_named(long_namedlist),
+  vector_get_ten_named(long_namedlist),
+  vector_get_c_named(long_namedlist),
+  vector_get_index_named(long_namedlist)
+) %>%
   group_by(expr) %>%
   summarize(median_time = median(time)) %>%
   arrange(-median_time) %>%
-  mutate(expr = str_replace(expr, "\\((.*)\\)", " \\1"),
-         expr = str_replace(expr, "vector_", "")) %>%
+  mutate(
+    expr = str_replace(expr, "\\((.*)\\)", " \\1"),
+    expr = str_replace(expr, "vector_", "")
+  ) %>%
   separate_wider_delim(expr, names = c("f", "data"), delim = " ") %>%
   mutate(f = str_replace(f, "_named", "")) %>%
   pivot_wider(names_from = data, values_from = median_time)
-
diff --git a/benchmarking/benchmarking_df.R b/benchmarking/benchmarking_df.R
index a951d84..3f30317 100644
--- a/benchmarking/benchmarking_df.R
+++ b/benchmarking/benchmarking_df.R
@@ -11,92 +11,110 @@ set.seed(19760620)
 n_kmers <- 4^8
 n_genera <- 4000
 
-df_grow_rbind <- function(n){
+df_grow_rbind <- function(n) {
   df <- data.frame(kmer = NULL, genus = NULL, count = NULL)
-  for(i in 1:n) {
-    df <- rbind(df, list(kmer = sample(n_kmers, 1),
-                      genus = sample(n_genera, 1),
-                      count = sample(10, 1)))
+  for (i in 1:n) {
+    df <- rbind(df, list(
+      kmer = sample(n_kmers, 1),
+      genus = sample(n_genera, 1),
+      count = sample(10, 1)
+    ))
   }
   df
 }
 
-tbl_grow_rbind <- function(n){
+tbl_grow_rbind <- function(n) {
   df <- tibble(kmer = NULL, genus = NULL, count = NULL)
-  for(i in 1:n) {
-    df <- bind_rows(df, tibble_row(kmer = sample(n_kmers, 1),
-                         genus = sample(n_genera, 1),
-                         count = sample(10, 1)))
+  for (i in 1:n) {
+    df <- bind_rows(df, tibble_row(
+      kmer = sample(n_kmers, 1),
+      genus = sample(n_genera, 1),
+      count = sample(10, 1)
+    ))
   }
   df
 }
 
-dt_grow_rbind <- function(n){
+dt_grow_rbind <- function(n) {
   df <- data.table(kmer = NULL, genus = NULL, count = NULL)
-  for(i in 1:n) {
-    df <- rbindlist(list(df, list(kmer = sample(n_kmers, 1),
-                                  genus = sample(n_genera, 1),
-                                  count = sample(10, 1))))
+  for (i in 1:n) {
+    df <- rbindlist(list(df, list(
+      kmer = sample(n_kmers, 1),
+      genus = sample(n_genera, 1),
+      count = sample(10, 1)
+    )))
   }
   df
 }
 
-df_grow_index <- function(n){
+df_grow_index <- function(n) {
   df <- data.frame(kmer = 0, genus = 0, count = 0)
-  for(i in 1:n) {
-    df[i, ] <- list(kmer = sample(n_kmers, 1),
-                    genus = sample(n_genera, 1),
-                    count = sample(10, 1))
+  for (i in 1:n) {
+    df[i, ] <- list(
+      kmer = sample(n_kmers, 1),
+      genus = sample(n_genera, 1),
+      count = sample(10, 1)
+    )
   }
   df
 }
 
-tbl_grow_index <- function(n){
+tbl_grow_index <- function(n) {
   df <- tibble(kmer = 0, genus = 0, count = 0)
-  for(i in 1:n) {
-    df[i, ] <- tibble_row(kmer = sample(n_kmers, 1),
-                          genus = sample(n_genera, 1),
-                          count = sample(10, 1))
+  for (i in 1:n) {
+    df[i, ] <- tibble_row(
+      kmer = sample(n_kmers, 1),
+      genus = sample(n_genera, 1),
+      count = sample(10, 1)
+    )
   }
   df
 }
 
-df_prealloc_index <- function(n){
+df_prealloc_index <- function(n) {
   df <- data.frame(kmer = rep(0, n), genus = rep(0, n), count = rep(0, n))
-  for(i in 1:n) {
-    df[i, ] <- list(kmer = sample(n_kmers, 1),
-                    genus = sample(n_genera, 1),
-                    count = sample(10, 1))
+  for (i in 1:n) {
+    df[i, ] <- list(
+      kmer = sample(n_kmers, 1),
+      genus = sample(n_genera, 1),
+      count = sample(10, 1)
+    )
   }
   df
 }
 
-tbl_prealloc_index <- function(n){
+tbl_prealloc_index <- function(n) {
   df <- tibble(kmer = rep(0, n), genus = rep(0, n), count = rep(0, n))
-  for(i in 1:n) {
-    df[i, ] <- tibble_row(kmer = sample(n_kmers, 1),
-                          genus = sample(n_genera, 1),
-                          count = sample(10, 1))
+  for (i in 1:n) {
+    df[i, ] <- tibble_row(
+      kmer = sample(n_kmers, 1),
+      genus = sample(n_genera, 1),
+      count = sample(10, 1)
+    )
   }
   df
 }
 
-dt_prealloc_index_orig <- function(n){
+dt_prealloc_index_orig <- function(n) {
   df <- data.table(kmer = rep(0, n), genus = rep(0, n), count = rep(0, n))
-  for(i in 1:n) {
-    df[i, (colnames(df)) := list(sample(n_kmers, 1),
-                                 sample(n_genera, 1),
-                                 sample(10, 1))]
+  for (i in 1:n) {
+    df[i, (colnames(df)) := list(
+      sample(n_kmers, 1),
+      sample(n_genera, 1),
+      sample(10, 1)
+    )]
   }
   df
 }
 
-dt_prealloc_index_set <- function(n){
+dt_prealloc_index_set <- function(n) {
   df <- data.table(kmer = rep(0, n), genus = rep(0, n), count = rep(0, n))
-  for(i in 1:n) {
-    set(df, i, 1:3, list(sample(n_kmers, 1),
-                         sample(n_genera, 1),
-                         sample(10, 1)))
+  for (i in 1:n) {
+    set(df, i, 1:3, list(
+      sample(n_kmers, 1),
+      sample(n_genera, 1),
+      sample(10, 1)
+    ))
   }
   df
 }
@@ -142,13 +160,13 @@ matrix_predefine <- function(n) {
 
   m <- matrix(0, nrow = n_kmers, ncol = n_genera)
 
-  for(i in 1:n)  {
+  for (i in 1:n) {
     m[kmer[i], genus[i]] <- count[i]
   }
   m
 }
 
-matrix_sparseC_old <- function(n){
+matrix_sparseC_old <- function(n) {
   kmer <- sample(n_kmers, n, replace = TRUE)
   genus <- sample(n_genera, n, replace = TRUE)
   count <- sample(10, n, replace = TRUE)
@@ -156,21 +174,20 @@ matrix_sparseC_old <- function(n){
   m <- Matrix(0, nrow = n_kmers, ncol = n_genera)
   m <- as(m, "CsparseMatrix")
 
-  for(i in 1:n)  {
+  for (i in 1:n) {
     m[kmer[i], genus[i]] <- count[i]
   }
   m
-
 }
 
-matrix_sparseC <- function(n){
+matrix_sparseC <- function(n) {
   kmer <- sample(n_kmers, n, replace = TRUE)
   genus <- sample(n_genera, n, replace = TRUE)
   count <- sample(10, n, replace = TRUE)
   sparseMatrix(i = kmer, j = genus, x = count, repr = "C")
 }
 
-matrix_sparseR_old <- function(n){
+matrix_sparseR_old <- function(n) {
   kmer <- sample(n_kmers, n, replace = TRUE)
   genus <- sample(n_genera, n, replace = TRUE)
   count <- sample(10, n, replace = TRUE)
@@ -178,13 +195,13 @@ matrix_sparseR_old <- function(n){
   m <- Matrix(0, nrow = n_kmers, ncol = n_genera)
   m <- as(m, "RsparseMatrix")
 
-  for(i in 1:n)  {
+  for (i in 1:n) {
     m[kmer[i], genus[i]] <- count[i]
   }
   m
 }
 
-matrix_sparseR <- function(n){
+matrix_sparseR <- function(n) {
   kmer <- sample(n_kmers, n, replace = TRUE)
   genus <- sample(n_genera, n, replace = TRUE)
   count <- sample(10, n, replace = TRUE)
@@ -192,7 +209,7 @@ matrix_sparseR <- function(n){
 }
 
 
-matrix_sparseT_old <- function(n){
+matrix_sparseT_old <- function(n) {
   kmer <- sample(n_kmers, n, replace = TRUE)
   genus <- sample(n_genera, n, replace = TRUE)
   count <- sample(10, n, replace = TRUE)
@@ -200,14 +217,13 @@ matrix_sparseT_old <- function(n){
   m <- Matrix(0, nrow = n_kmers, ncol = n_genera)
   m <- as(m, "TsparseMatrix")
 
-  for(i in 1:n)  {
+  for (i in 1:n) {
     m[kmer[i], genus[i]] <- count[i]
   }
   m
-
 }
 
-matrix_sparseT <- function(n){
+matrix_sparseT <- function(n) {
   kmer <- sample(n_kmers, n, replace = TRUE)
   genus <- sample(n_genera, n, replace = TRUE)
   count <- sample(10, n, replace = TRUE)
@@ -219,30 +235,30 @@ Rcpp::sourceCpp("benchmarking_df.cpp")
 
 n <- 100
 
-microbenchmark(df_grow_rbind(n),
-               df_grow_index(n),
-               df_prealloc_index(n),
-               df_predefine(n),
-               tbl_grow_rbind(n),
-               tbl_grow_index(n),
-               tbl_prealloc_index(n),
-               tbl_predefine(n),
-               dt_grow_rbind(n),
-               dt_prealloc_index_orig(n),
-               dt_prealloc_index_set(n),
-               dt_predefine(n),
-               matrix_sparseC_old(n),
-               matrix_sparseR_old(n),
-               matrix_sparseT_old(n),
-               matrix_sparseC(n),
-               matrix_sparseR(n),
-               matrix_sparseT(n),
-               list_predefine(n),
-               vector_predefine(n),
-               matrix_predefine(n),
-               df_rcpp(n)
-
-               ) %>%
+microbenchmark(
+  df_grow_rbind(n),
+  df_grow_index(n),
+  df_prealloc_index(n),
+  df_predefine(n),
+  tbl_grow_rbind(n),
+  tbl_grow_index(n),
+  tbl_prealloc_index(n),
+  tbl_predefine(n),
+  dt_grow_rbind(n),
+  dt_prealloc_index_orig(n),
+  dt_prealloc_index_set(n),
+  dt_predefine(n),
+  matrix_sparseC_old(n),
+  matrix_sparseR_old(n),
+  matrix_sparseT_old(n),
+  matrix_sparseC(n),
+  matrix_sparseR(n),
+  matrix_sparseT(n),
+  list_predefine(n),
+  vector_predefine(n),
+  matrix_predefine(n),
+  df_rcpp(n)
+) %>%
   group_by(expr) %>%
   summarize(median_time = median(time)) %>%
   arrange(-median_time)
@@ -275,98 +291,98 @@ lst <- list_predefine(n)
 arr <- arrow_table(df)
 
 
-get_df_single <- function(){
-  df[which(df$kmer == 20),]
+get_df_single <- function() {
+  df[which(df$kmer == 20), ]
 }
 
-get_df_three <- function(){
-  df[which(df$kmer == 20 | df$kmer == 30 | df$kmer == 50),]
+get_df_three <- function() {
+  df[which(df$kmer == 20 | df$kmer == 30 | df$kmer == 50), ]
 }
 
-get_dt_single <- function(){
-  dt[which(kmer == 20),]
+get_dt_single <- function() {
+  dt[which(kmer == 20), ]
 }
 
-get_dt_three <- function(){
-  dt[which(kmer == 20 | kmer == 30 | kmer == 50),]
+get_dt_three <- function() {
+  dt[which(kmer == 20 | kmer == 30 | kmer == 50), ]
 }
 
-get_dt_singlek <- function(){
+get_dt_singlek <- function() {
   dt2[.(20)]
 }
 
-get_dt_threek <- function(){
-  dt2[.(c(20, 50, 30)),]
+get_dt_threek <- function() {
+  dt2[.(c(20, 50, 30)), ]
 }
 
-get_tbl_single <- function(){
+get_tbl_single <- function() {
   filter(tbl, kmer == 20)
 }
 
-get_tbl_three <- function(){
+get_tbl_three <- function() {
   filter(tbl, kmer == 20 | kmer == 30 | kmer == 50)
 }
 
-get_duck_single <- function(){
+get_duck_single <- function() {
   filter(duck, kmer == 20)
 }
 
-get_duck_three <- function(){
+get_duck_three <- function() {
   filter(duck, kmer == 20 | kmer == 30 | kmer == 50)
 }
 
-get_arrow_single <- function(){
+get_arrow_single <- function() {
   filter(arr, kmer == 20)
 }
 
-get_arrow_three <- function(){
+get_arrow_three <- function() {
   filter(arr, kmer == 20 | kmer == 30 | kmer == 50)
 }
 
-get_dbi_single <- function(){
+get_dbi_single <- function() {
   dbGetQuery(con, "SELECT * FROM duck WHERE kmer == 20")
 }
 
-get_dbi_three <- function(){
+get_dbi_three <- function() {
   dbGetQuery(con, "SELECT * FROM duck WHERE kmer == 20 OR kmer == 30 OR kmer == 50")
 }
 
-get_tbl_threeJ <- function(){
+get_tbl_threeJ <- function() {
   filter_table <- tibble(kmer = c(20, 30, 50))
   inner_join(tbl, filter_table, by = "kmer")
 }
 
-get_mfull_single <- function(){
-  mfull[20,]
+get_mfull_single <- function() {
+  mfull[20, ]
 }
 
-get_mfull_three <- function(){
-  mfull[c(20, 30, 50),]
+get_mfull_three <- function() {
+  mfull[c(20, 30, 50), ]
 }
 
 
-get_msparseT_single <- function(){
-  msparseT[20,]
+get_msparseT_single <- function() {
+  msparseT[20, ]
 }
 
-get_msparseT_three <- function(){
-  msparseT[c(20, 30, 50),]
+get_msparseT_three <- function() {
+  msparseT[c(20, 30, 50), ]
 }
 
-get_msparseR_single <- function(){
-  msparseR[20,]
+get_msparseR_single <- function() {
+  msparseR[20, ]
 }
 
-get_msparseR_three <- function(){
-  msparseR[c(20, 30, 50),]
+get_msparseR_three <- function() {
+  msparseR[c(20, 30, 50), ]
 }
 
-get_msparseC_single <- function(){
-  msparseC[20,]
+get_msparseC_single <- function() {
+  msparseC[20, ]
 }
 
-get_msparseC_three <- function(){
-  msparseC[c(20, 30, 50),]
+get_msparseC_three <- function() {
+  msparseC[c(20, 30, 50), ]
 }
 
 kmer_vec <- lst$kmer
@@ -375,81 +391,90 @@ count_vec <- lst$count
 
 get_vector_single <- function() {
   kmer_present <- kmer_vec == 20
-  list(kmer = kmer_vec[kmer_present],
-       genus = genus_vec[kmer_present],
-       count = count_vec[kmer_present])
+  list(
+    kmer = kmer_vec[kmer_present],
+    genus = genus_vec[kmer_present],
+    count = count_vec[kmer_present]
+  )
 }
 
 get_vector_threeOR <- function() {
   kmer_present <- kmer_vec == 20 | kmer_vec == 30 | kmer_vec == 50
-  list(kmer = kmer_vec[kmer_present],
-       genus = genus_vec[kmer_present],
-       count = count_vec[kmer_present])
+  list(
+    kmer = kmer_vec[kmer_present],
+    genus = genus_vec[kmer_present],
+    count = count_vec[kmer_present]
+  )
 }
 
 get_vector_threeIN <- function() {
   kmer_present <- kmer_vec %in% c(20, 30, 50)
 
-  list(kmer = kmer_vec[kmer_present],
-       genus = genus_vec[kmer_present],
-       count = count_vec[kmer_present])
+  list(
+    kmer = kmer_vec[kmer_present],
+    genus = genus_vec[kmer_present],
+    count = count_vec[kmer_present]
+  )
 }
 
 get_vector_str2lang <- function() {
   kmer_present <- eval(str2lang(paste(paste("kmer_vec == ", c(20, 30, 50)),
-                                      collapse = " | ")))
+    collapse = " | "
+  )))
   kmer_vec[kmer_present]
 }
 
 
-get_which_single <- function(){
+get_which_single <- function() {
   kmer_present <- which(kmer_vec == 20)
-  list(kmer = kmer_vec[kmer_present],
-       genus = genus_vec[kmer_present],
-       count = count_vec[kmer_present])
+  list(
+    kmer = kmer_vec[kmer_present],
+    genus = genus_vec[kmer_present],
+    count = count_vec[kmer_present]
+  )
 }
 
-get_which_three <- function(){
+get_which_three <- function() {
   kmer_present <- which(kmer_vec == 20 | kmer_vec == 30 | kmer_vec == 50)
-  list(kmer = kmer_vec[kmer_present],
-       genus = genus_vec[kmer_present],
-       count = count_vec[kmer_present])
+  list(
+    kmer = kmer_vec[kmer_present],
+    genus = genus_vec[kmer_present],
+    count = count_vec[kmer_present]
+  )
 }
 
 microbenchmark(get_df_single(),
-               get_duck_single(),
-               get_arrow_single(),
-               get_dt_single(),
-               get_dt_singlek(),
-               get_tbl_single(),
-               get_dbi_single(),
-               get_mfull_single(),
-               get_msparseT_single(),
-               get_msparseR_single(),
-               get_msparseC_single(),
-               # get_vector_single(),
-               get_which_single(),
-
-               get_df_three(),
-               get_dt_three(),
-               get_dt_threek(),
-               get_dbi_three(),
-               get_tbl_three(),
-               get_duck_three(),
-               get_arrow_three(),
-               # get_tbl_threeJ(),
-               get_mfull_three(),
-               get_msparseT_three(),
-               get_msparseR_three(),
-               get_msparseC_three(),
-               #get_vector_threeOR(),
-               #get_vector_threeIN(),
-               get_which_three(),
-               #get_vector_str2lang(),
-               times = 10
-               ) %>%
+  get_duck_single(),
+  get_arrow_single(),
+  get_dt_single(),
+  get_dt_singlek(),
+  get_tbl_single(),
+  get_dbi_single(),
+  get_mfull_single(),
+  get_msparseT_single(),
+  get_msparseR_single(),
+  get_msparseC_single(),
+  # get_vector_single(),
+  get_which_single(),
+  get_df_three(),
+  get_dt_three(),
+  get_dt_threek(),
+  get_dbi_three(),
+  get_tbl_three(),
+  get_duck_three(),
+  get_arrow_three(),
+  # get_tbl_threeJ(),
+  get_mfull_three(),
+  get_msparseT_three(),
+  get_msparseR_three(),
+  get_msparseC_three(),
+  # get_vector_threeOR(),
+  # get_vector_threeIN(),
+  get_which_three(),
+  # get_vector_str2lang(),
+  times = 10
+) %>%
   group_by(expr) %>%
   summarize(median_time = median(time)) %>%
   arrange(-median_time) %>%
   print(n = Inf)
-
diff --git a/benchmarking/benchmarking_joins.R b/benchmarking/benchmarking_joins.R
index a22f777..eef58d0 100644
--- a/benchmarking/benchmarking_joins.R
+++ b/benchmarking/benchmarking_joins.R
@@ -1,24 +1,31 @@
+animal_legs <- data.frame(
+  animal = c("cow", "fish", "chicken", "dog", "sheep"),
+  n_legs = c(4, 0, 2, 4, 4)
+)
 
-animal_legs <- data.frame(animal = c("cow", "fish", "chicken", "dog", "sheep"),
-                          n_legs = c(4, 0, 2, 4, 4))
-
-animal_sounds <- data.frame(animal = c("cow", "chicken", "cat", "sheep", "dog"),
-                            sounds = c("mooo", "cluck", "meow", "baaa", "bark"))
+animal_sounds <- data.frame(
+  animal = c("cow", "chicken", "cat", "sheep", "dog"),
+  sounds = c("mooo", "cluck", "meow", "baaa", "bark")
+)
 
-animal_pats_farm <- data.frame(animals = c("cow", "fish", "chicken", "dog", "cat", "sheep"),
-                               pat_has = c(T, F, T, T, T, T))
-#base R
-merge(animal_legs, animal_sounds, all = FALSE) #inner join
-merge(animal_legs, animal_sounds, all = FALSE, by = "animal") #inner join
-merge(animal_legs, animal_sounds, all = TRUE) #full join
-merge(animal_legs, animal_sounds, all.x = TRUE) #left join
-merge(animal_legs, animal_sounds, all.y = TRUE) #right join
+animal_pats_farm <- data.frame(
+  animals = c("cow", "fish", "chicken", "dog", "cat", "sheep"),
+  pat_has = c(T, F, T, T, T, T)
+)
+# base R
+merge(animal_legs, animal_sounds, all = FALSE) # inner join
+merge(animal_legs, animal_sounds, all = FALSE, by = "animal") # inner join
+merge(animal_legs, animal_sounds, all = TRUE) # full join
+merge(animal_legs, animal_sounds, all.x = TRUE) # left join
+merge(animal_legs, animal_sounds, all.y = TRUE) # right join
 
-merge(animal_legs, animal_pats_farm, all = FALSE,
-      by.x = "animal", by.y = "animals") #inner join
+merge(animal_legs, animal_pats_farm,
+  all = FALSE,
+  by.x = "animal", by.y = "animals"
+) # inner join
 
 
-#dplyr
+# dplyr
 inner_join(animal_legs, animal_sounds)
 inner_join(animal_legs, animal_sounds, by = "animal")
 inner_join(animal_legs, animal_sounds, by = join_by(animal))
@@ -27,31 +34,31 @@ full_join(animal_sounds, animal_legs, by = "animal")
 left_join(animal_legs, animal_sounds, by = "animal")
 right_join(animal_legs, animal_sounds, by = "animal")
 
-#inner_join(animal_legs, animal_pats_farm) #Inner join errors
+# inner_join(animal_legs, animal_pats_farm) #Inner join errors
 inner_join(animal_legs, animal_pats_farm, by = c("animal" = "animals"))
 inner_join(animal_legs, animal_pats_farm, by = join_by(animal == animals))
 
 inner_join(animal_legs, animal_pats_farm, by = join_by(animal == animals)) |>
   inner_join(x = _, animal_sounds, by = "animal")
 
-#%>%
+# %>%
 inner_join(animal_legs, animal_pats_farm, by = join_by(animal == animals)) %>%
   inner_join(., animal_sounds, by = "animal")
 
 
-#data.table
+# data.table
 animal_legs_dt <- data.table::data.table(animal_legs, key = "animal")
 animal_sounds_dt <- data.table::data.table(animal_sounds, key = "animal")
 
 
-#animal_legs[row, column, by, other arguments]
-animal_legs_dt[animal_sounds_dt, on = .(animal)] #right join
-animal_sounds_dt[animal_legs_dt, on = .(animal)] #left join
-animal_sounds_dt[animal_legs_dt, nomatch = NULL, on = .(animal)] #inner join
-animal_sounds_dt[animal_legs_dt, nomatch = NULL] #inner join
+# animal_legs[row, column, by, other arguments]
+animal_legs_dt[animal_sounds_dt, on = .(animal)] # right join
+animal_sounds_dt[animal_legs_dt, on = .(animal)] # left join
+animal_sounds_dt[animal_legs_dt, nomatch = NULL, on = .(animal)] # inner join
+animal_sounds_dt[animal_legs_dt, nomatch = NULL] # inner join
 
 uniq_animals <- unique(c(animal_sounds_dt$animal, animal_legs_dt$animal))
-animal_legs_dt[animal_sounds_dt[uniq_animals], on = .(animal)] #full join
+animal_legs_dt[animal_sounds_dt[uniq_animals], on = .(animal)] # full join
 
 animal_pats_farm_dt <- data.table::data.table(animal_pats_farm, key = "animals")
 
@@ -73,8 +80,7 @@ microbenchmark::microbenchmark(
   dt = {
     fasta_dt <- data.table::data.table(fasta_df, key = "id")
     genera_dt <- data.table::data.table(genera, key = "id")
-    fasta_dt[genera_dt, nomatch = NULL, on = .(id)] #inner join
+    fasta_dt[genera_dt, nomatch = NULL, on = .(id)] # inner join
   },
   dtA = fasta_dtA[genera_dtA, nomatch = NULL, on = .(id)]
-
 )
diff --git a/benchmarking/benchmarking_readfile.R b/benchmarking/benchmarking_readfile.R
index 4e5f861..0b8b5fb 100644
--- a/benchmarking/benchmarking_readfile.R
+++ b/benchmarking/benchmarking_readfile.R
@@ -1,12 +1,10 @@
-
 file <- "benchmarking/trainset19_072023.rdp/trainset19_072023.rdp.fasta"
 
 microbenchmark::microbenchmark(
   scan = scan(file, sep = "\n", what = character(), quiet = TRUE) |> stringi::stri_detect_regex("^>"),
   rl = readLines(file) |> stringi::stri_detect_regex("^>"),
   dt = data.table::fread(file, header = FALSE)$V1 |> stringi::stri_detect_regex("^>"),
-  r_l= readr::read_lines(file) |> stringi::stri_detect_regex("^>"),
+  r_l = readr::read_lines(file) |> stringi::stri_detect_regex("^>"),
   v_l = vroom::vroom_lines(file) |> stringi::stri_detect_regex("^>"),
   times = 10
-
 )
diff --git a/benchmarking/benchmarking_readtsv.R b/benchmarking/benchmarking_readtsv.R
index 1767346..f52871c 100644
--- a/benchmarking/benchmarking_readtsv.R
+++ b/benchmarking/benchmarking_readtsv.R
@@ -1,29 +1,37 @@
 taxonomy <- "benchmarking/trainset19_072023.rdp/trainset19_072023.rdp.tax"
 
 genera <- readr::read_tsv(taxonomy,
-                          col_names = c("accession", "taxonomy")) |>
+  col_names = c("accession", "taxonomy")
+) |>
   dplyr::mutate(taxonomy = stringi::stri_replace_all_regex(taxonomy, ";$", ""))
 
 
 microbenchmark::microbenchmark(
-  r.d = read.delim(taxonomy, header = FALSE,
-             col.names = c("accession", "taxonomy"),
-             colClasses = c("character", "character")) |>
+  r.d = read.delim(taxonomy,
+    header = FALSE,
+    col.names = c("accession", "taxonomy"),
+    colClasses = c("character", "character")
+  ) |>
     dplyr::mutate(taxonomy = stringi::stri_replace_last_regex(taxonomy, ";$", "")),
   r_t = readr::read_tsv(taxonomy,
-                  col_names = c("accession", "taxonomy"),
-                  col_types = readr::cols(.default = readr::col_character())) |>
+    col_names = c("accession", "taxonomy"),
+    col_types = readr::cols(.default = readr::col_character())
+  ) |>
     dplyr::mutate(taxonomy = stringi::stri_replace_last_regex(taxonomy, ";$", "")),
-  v = vroom::vroom(taxonomy, delim = "\t",
-               col_names = c("accession", "taxonomy"),
-               col_types = vroom::cols(.default = vroom::col_character())) |>
+  v = vroom::vroom(taxonomy,
+    delim = "\t",
+    col_names = c("accession", "taxonomy"),
+    col_types = vroom::cols(.default = vroom::col_character())
+  ) |>
     dplyr::mutate(taxonomy = stringi::stri_replace_last_regex(taxonomy, ";$", "")),
-  d.t = data.table::fread(taxonomy, sep = "\t", header = FALSE,
-                    col.names = c("accession", "taxonomy")) |>
+  d.t = data.table::fread(taxonomy,
+    sep = "\t", header = FALSE,
+    col.names = c("accession", "taxonomy")
+  ) |>
     dplyr::mutate(taxonomy = stringi::stri_replace_last_regex(taxonomy, ";$", "")),
   arr = arrow::read_tsv_arrow(taxonomy,
-                              col_names = c("accession", "taxonomy"),
-                              col_types = "cc") |>
+    col_names = c("accession", "taxonomy"),
+    col_types = "cc"
+  ) |>
     dplyr::mutate(taxonomy = stringi::stri_replace_last_regex(taxonomy, ";$", ""))
-
 )
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 97e390b..02a3b30 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -1,16 +1,109 @@
+AEM
+al
+Appl
+archaeal
+aut
+autom
 Barnesiella
+Benchmarking
+BugReports
+bz
+CallEntries
+CallMethodDef
+cerr
 CMD
 Codecov
+compileAttributes
+Config
+const
+countSEXP
+countsSEXP
+cout
+cre
+DataFrame
+desc
+df
+DL
+dll
+DllInfo
+doi
+edu
+endif
+Environ
+et
+eukaryotic
+fasta
+FASTA
+FUNC
+Garrity
+github
 greengenes
+gz
+http
+https
+ifdef
+init
+JM
+kmer
 kmers
+knitr
+LazyData
+LazyDataCompression
+LinkingTo
+linter
+MERCHANTABILITY
+Microbiol
 microbiome
 mothur
+nolint
+NONINFRINGEMENT
 nt
+NumericMatrix
+NumericVector
+ORCID
+pds
 phylotypr
+PMC
+PMCID
+PMID
 Porphyromonadaceae
+priorsSEXP
+pschloss
+purrr
 QIIME
+Rcerr
+Rcout
+rcpp
+Rcpp
+RCPP
+RcppExport
 RDP
+rdp
 RDP's
+readr
+registerRoutines
+Rfast
 Ribosomal
 Riffomonas
+riffomonas
+rmarkdown
+rngScope
+RNGScope
+RObject
+Rostream
+ROSTREAM
+Roxygen
+RoxygenNote
 rRNA
+seqeunces
+SEXP
+sourceforge
+stringi
+sublicense
+testthat
+Tiedje
+trainset
+umich
+useDynamicSymbols
+VignetteBuilder
+xz