Merge branch 'master' of https://github.com/BIGslu/kimma

BIGslu · Feb 10, 2022 · 73b4779 · 73b4779
2 parents dee408f + 00a309a
commit 73b4779
Show file tree

Hide file tree

Showing 7 changed files with 103 additions and 30 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,6 +10,7 @@ Encoding: UTF-8
 LazyData: true
 biocViews: 
 Imports: 
+    AICcmodavg,
     broom,
     car,
     coxme,

diff --git a/R/kimma_lm.R b/R/kimma_lm.R
@@ -23,19 +23,29 @@ kimma_lm <- function(model.lm, to.model.gene, gene, use.weights){
     }
 
     p.lm <- broom::tidy(fit.lm)
-    sigma.lm <- stats::sigma(fit.lm)
 
     #Extract results
     results.lm <- data.frame(
       model = rep("lm", nrow(p.lm)),    #Label model as lm
       gene = rep(gene, nrow(p.lm)),     #gene name
       variable = p.lm$term,             #variables in model
       estimate = p.lm$estimate,         #estimates in model
-      pval = p.lm$p.value,              #P-value
-      sigma = rep(sigma.lm, nrow(p.lm)))#sigma
+      pval = p.lm$p.value)              #P-value
+
+    #Model fit metrics
+    fit.metrics <- data.frame(
+      model="lm.fit",
+      gene=gene,
+      sigma = stats::sigma(fit.lm),
+      AIC = stats::AIC(fit.lm),
+      BIC = stats::BIC(fit.lm),
+      Rsq = summary(fit.lm)$r.squared,
+      adj_Rsq = summary(fit.lm)$adj.r.squared
+    )
 
     results.lm.ls <- list()
     results.lm.ls[["fit"]] <- fit.lm
+    results.lm.ls[["metrics"]] <- fit.metrics
     results.lm.ls[["results"]] <- results.lm
     return(results.lm.ls)
 }
diff --git a/R/kimma_lme.R b/R/kimma_lme.R
@@ -40,20 +40,45 @@ kimma_lme <- function(model.lme, to.model.gene, gene, use.weights){
         gene = gene,                        #gene name
         variable = p.lme$term,              #variables in model
         estimate = est.lme$Estimate,        #estimate in model
-        pval = p.lme$p.value,               #P-value
-        sigma = sigma.lme)                 #sigma
+        pval = p.lme$p.value)               #P-value
     } else {
       #If 3+ variable
       results.lme <- data.frame(
         model = "lme",                      #Label model as lme
         gene = gene,                        #gene name
         variable = p.lme$term,              #variables in model
         estimate = "seeContrasts",          #estimate in model
-        pval = p.lme$p.value,               #P-value
-        sigma = sigma.lme)                  #sigma
+        pval = p.lme$p.value)               #P-value
     }
+
+    #Calculate R-squared
+    if(use.weights){
+      null <- stats::glm(formula = expression ~ 1, data = to.model.gene,
+                  weights = to.model.gene$weight)
+    } else{
+      null <- stats::glm(formula = expression ~ 1, data = to.model.gene)
+    }
+
+    L0 <- as.vector(stats::logLik(null))
+    L1 <- as.vector(stats::logLik(fit.lme))
+    n <- stats::nobs(fit.lme)
+    ret <- 1 - exp(-2 / n * (L1 - L0))
+    max.r2 <- 1 - exp(2 / n * L0)
+
+    #Model fit metrics
+    fit.metrics <- data.frame(
+      model="lme.fit",
+      gene=gene,
+      sigma = stats::sigma(fit.lme),
+      AIC = stats::AIC(fit.lme),
+      BIC = stats::BIC(fit.lme),
+      Rsq = ret,
+      adj_Rsq = ret / max.r2
+    )
+
     results.lme.ls <- list()
     results.lme.ls[["fit"]] <- fit.lme
+    results.lme.ls[["metrics"]] <- fit.metrics
     results.lme.ls[["results"]] <- results.lme
     return(results.lme.ls)
 

diff --git a/R/kimma_lmekin.R b/R/kimma_lmekin.R
@@ -27,27 +27,50 @@ kimma_lmekin <- function(model.lme, to.model.gene, gene, kin.subset, use.weights
                                weights=NULL)
     }
 
-
     #Calculate stats
     beta <- fit.kin$coefficients$fixed
     nvar <- length(beta)
     nfrail <- nrow(fit.kin$var) - nvar
     se <- sqrt(diag(fit.kin$var)[nfrail + 1:nvar])
     t <- beta/se
     p.kin <- stats::pchisq((t)^2, 1, lower.tail=FALSE)
-    sigma.kin <- fit.kin$sigma
 
     #Extract results
     results.kin <- data.frame(
       model = rep("lmekin", length(p.kin)),  #Label model as lmekin
       gene = rep(gene, length(p.kin)),       #gene name
       variable = names(p.kin),               #variables in model
       estimate = beta,                       #estimates in model
-      pval = p.kin,                          #P-value
-      sigma = rep(sigma.kin, length(p.kin))) #sigma
+      pval = p.kin)                          #P-value
+
+    #Calculate R-squared
+    if(use.weights){
+      null <- stats::glm(formula = expression ~ 1, data = to.model.gene,
+                  weights = to.model.gene$weight)
+    } else{
+      null <- stats::glm(formula = expression ~ 1, data = to.model.gene)
+    }
+
+    L0 <- as.vector(stats::logLik(null))
+    L1 <- as.vector(fit.kin$loglik)
+    n <- nrow(to.model.gene)
+    ret <- 1 - exp(-2 / n * (L1 - L0))
+    max.r2 <- 1 - exp(2 / n * L0)
+
+    #Model fit metrics
+    fit.metrics <- data.frame(
+      model="lmekin.fit",
+      gene=gene,
+      sigma = fit.kin$sigma,
+      AIC = AICcmodavg::AICc(fit.kin, second.ord=FALSE),
+      BIC = AICcmodavg::useBIC(fit.kin),
+      Rsq = ret,
+      adj_Rsq = ret / max.r2
+    )
 
     results.kin.ls <- list()
     results.kin.ls[["fit"]] <- fit.kin
+    results.kin.ls[["metrics"]] <- fit.metrics
     results.kin.ls[["results"]] <- results.kin
     return(results.kin.ls)
   }
diff --git a/R/kmFit_contrast_kin.R b/R/kmFit_contrast_kin.R
@@ -5,61 +5,69 @@
 #' @param patientID Character of variable name to match dat$targets to kinship row and column names.
 #' @param to.model.ls Formatted data from kimma_cleaning( )
 #' @param gene Character name of gene of interest
+#' @param use.weights Logical if gene specific weights should be used in model
 #'
 #' @return data frame with contrast model results
 #' @keywords internal
 
-kmFit_contrast_kin <- function(contrast.var, to.model.gene, patientID, to.model.ls, gene, use.weights){
-  contrast.kin <- contrast.i <- V1 <- V2 <- variable <- contrast <- NULL
+kmFit_contrast_kin <- function(contrast.var, to.model.gene, patientID,
+                               to.model.ls, gene, use.weights){
+  contrast.kin <- contrast.i <- contrast.i.format <- V1 <- V2 <- variable <- contrast <- NULL
+  to.model.contrast <- to.model.gene
+
   #emmeans does not work for lmekin object. Instead, run pairwise contrasts as subsets
   for(contrast.i in contrast.var){
     #Class
     i.split <- strsplit(contrast.i, split=":")[[1]]
-    contrast.isnt.numeric <- !(unlist(lapply(to.model.gene[,i.split], is.numeric)))
+    contrast.isnt.numeric <- !(unlist(lapply(to.model.contrast[,i.split], is.numeric)))
 
     #only run on categorical variable
     if(any(contrast.isnt.numeric)){
       #non-interaction variables
       if(!grepl(":", contrast.i)){
         model.contrast <- paste("expression~",contrast.i, "+(1|", patientID, ")", sep="")
+        to.model.contrast.format <- to.model.contrast
+        contrast.i.format <- contrast.i
       } else{
         model.contrast <- paste("expression~", i.split[1], "_", i.split[2],
                                 "+(1|", patientID, ")", sep="")
         #Create variable in data
-        to.model.gene[,paste0(i.split[1], "_", i.split[2])] <-
-          paste(unlist(to.model.gene[,i.split[1]]),
-                unlist(to.model.gene[,i.split[2]]), sep="_")
-        contrast.i <- gsub(":","_",contrast.i)
+        to.model.contrast.format <- to.model.contrast
+        to.model.contrast.format[,paste0(i.split[1], "_", i.split[2])] <-
+          paste(unlist(to.model.contrast[,i.split[1]]),
+                unlist(to.model.contrast[,i.split[2]]), sep="_")
+        contrast.i.format <- gsub(":","_",contrast.i)
       }
 
         #list all possible combos
-        levels.OI <- as.character(unlist(unique(to.model.gene[,contrast.i])))
+        levels.OI <- as.character(unlist(unique(to.model.contrast.format[,contrast.i.format])))
         contrast.combo <- as.data.frame(t(utils::combn(levels.OI, m=2))) %>%
-          dplyr::mutate(combo = paste(contrast.i, V2, " - ",contrast.i, V1, sep=""))
+          dplyr::mutate(combo = paste(contrast.i.format, V2, " - ",contrast.i.format, V1, sep=""))
 
         if(nrow(contrast.combo)>0){
           for(row.i in 1:nrow(contrast.combo)){
+            to.model.current <- to.model.contrast.format
             contrast.kin.temp <- NULL
             #filter data to each pairwise comparison of interest
-            to.model.gene.contrast <- dplyr::filter(to.model.gene,
-                                      get(contrast.i) %in% unlist(contrast.combo[row.i,]))
+            to.model.contrast.gene <- dplyr::filter(to.model.current,
+                                      get(contrast.i.format) %in% unlist(contrast.combo[row.i,]))
 
-            to.keep <- unique(unlist(to.model.gene.contrast[,patientID]))
+            to.keep <- unique(unlist(to.model.current[,patientID]))
             kin.contrast <- to.model.ls[["kin.subset"]][rownames(to.model.ls[["kin.subset"]]) %in%
                                                           to.keep, to.keep]
 
-            contrast.kin.temp <- tryCatch({
-              kimma_lmekin(model.contrast, to.model.gene.contrast, gene, kin.contrast, use.weights)
-            }, error=function(e){})
+            contrast.kin.temp <- kimma_lmekin(model.contrast, to.model.contrast.gene,
+                                              gene, kin.contrast, use.weights)
 
             if(!is.null(contrast.kin.temp)){
             contrast.kin <- contrast.kin.temp[["results"]] %>%
               dplyr::filter(variable != "(Intercept)") %>%
-              dplyr::mutate(variable = contrast.i,
+              dplyr::mutate(variable = contrast.i.format,
                             contrast = contrast.combo[row.i, "combo"],
                             model = "lmekin.contrast") %>%
-              dplyr::mutate(contrast = gsub(contrast.i,"",contrast)) %>%
-              dplyr::bind_rows(contrast.kin)}}
+              dplyr::mutate(contrast = gsub(contrast.i.format,"",contrast)) %>%
+              dplyr::bind_rows(contrast.kin)
+            }}
           }
     }
   }

diff --git a/man/kmFit.Rd b/man/kmFit.Rd
diff --git a/man/kmFit_contrast_kin.Rd b/man/kmFit_contrast_kin.Rd