Logistic Regression R Script Tool Wrappings

The script tool used during the Live Training Seminar: Go Deeper with Data Analytics Using ArcGIS Pro and R
R-ArcGIS · Sep 5, 2017 · 02a4f5f · 02a4f5f
1 parent f911efe
commit 02a4f5f
Showing 1 changed file with 144 additions and 0 deletions.
diff --git a/scripts/Logistic_Regression_Wrappings.R b/scripts/Logistic_Regression_Wrappings.R
@@ -0,0 +1,144 @@
+#Live Training Seminar: Go Deeper with Data Analytics Using ArcGIS Pro and R
+#Logistic Regression R Script Wrappings
+tool_exec<- function(in_params, out_params){
+
+  #####################################################################################################  
+  ### Check/Load Required Packages  
+  #####################################################################################################   
+  arc.progress_label("Loading packages...")
+  arc.progress_pos(20)
+
+  if(!requireNamespace("MKmisc", quietly = TRUE))
+    install.packages("MKmisc", quiet = TRUE)
+  if(!requireNamespace("ROCR", quietly = TRUE))
+    install.packages("ROCR", quiet = TRUE)
+  if(!requireNamespace("survey", quietly = TRUE))
+    install.packages("survey", quiet = TRUE)
+  if(!requireNamespace("pROC", quietly = TRUE))
+    install.packages("pROC", quiet = TRUE)
+  if(!requireNamespace("ROCR", quietly = TRUE))
+    install.packages("ROCR", quiet = TRUE)
+  if(!requireNamespace("caret", quietly = TRUE))
+    install.packages("caret", quiet = TRUE)
+
+  require(MKmisc)
+  require(ROCR)
+  require(survey)
+  require(pROC)
+  require(ROCR)
+  require(caret)
+
+  ##################################################################################################### 
+  ### Define input/output parameters
+  #####################################################################################################
+  input_data <- in_params[[1]]
+  train_percentage_size <- (in_params[[2]])/100
+  dependent_variable <- in_params[[3]]
+  independent_variables <- in_params[[4]]
+
+  output_prediction_data <- out_params[[1]]
+
+  ##################################################################################################### 
+  ### Load Data and Create Dataframe R Object 
+  #####################################################################################################
+  arc.progress_label("Loading data...")
+  arc.progress_pos(40)
+
+  d <- arc.open(input_data)
+  fields_list <- append(c(dependent_variable), independent_variables)
+  d_df_full <- arc.select(d)
+  d_df <- arc.select(d, fields = fields_list)
+
+  #####################################################################################################
+  ### Create Training and Testing Datasets
+  #####################################################################################################
+  arc.progress_label("Creating training and testing datasets...")
+  arc.progress_pos(60)
+  smp_size <- floor(train_percentage_size * nrow(d_df))
+
+  set.seed(1234)
+  train_ind <- sample(seq_len(nrow(d_df)), size = smp_size)
+
+  d_df_train <- d_df[train_ind, ]
+  d_df_test <- d_df[-train_ind, ]
+
+  #####################################################################################################
+  ### Fit Logistic Regression Model
+  #####################################################################################################
+  arc.progress_label("Creating training and testing datasets...")
+  arc.progress_pos(80)
+
+  response <- d_df_train[, 1]
+  predictors <- d_df_train[, -1]
+
+  d_df_train.log <- glm(response ~ .,  family = binomial(link = 'logit'), data = predictors)
+
+  d_df_full$Seagrass_Prediction <- predict(d_df_train.log, newdata = d_df_full, type = "response")
+
+  #####################################################################################################
+  ### Run Diagnostics on Logistic Regression Model
+  #####################################################################################################
+  arc.progress_label("Running diagnostics on fitted model...")
+  arc.progress_pos(80)
+
+  #Summary of model fit
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  cat(paste0("\n", "Summary of Fitted Logistic Regression Model", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  print(summary(d_df_train.log))
+
+  #Hosmer-Lemeshow Test
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  cat(paste0("\n", "Hosmer-Lemeshow Goodness of Fit Test Results", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  HL <- HLgof.test(fit = fitted(d_df_train.log), obs = d_df_train$Seagrass)
+  print(HL)
+
+  #Prediction Accuracy 
+  d_df_test.log.pred <- predict(d_df_train.log, newdata = d_df_test, type = 'response')
+  d_df_test.log.pred <- ifelse(d_df_test.log.pred > 0.5, 1, 0)
+  misClassificError <- mean(d_df_test.log.pred != d_df_test$Seagrass)
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  cat(paste0("\n", "Prediction Accuracy for Test Dataset", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  print(paste('Accuracy Percentage:', round((1-misClassificError)*100, 2)))
+  cat(paste0("\n"))
+
+  #ROC Curve
+  d_df_test.log.pred <- predict(d_df_train.log, newdata = d_df_test, type = 'response')
+  pred <- prediction(d_df_test.log.pred, d_df_test$Seagrass)
+  perf <- performance(pred, measure = "tpr", x.measure = "fpr")
+  plot(perf)
+  auc <- performance(pred, measure = "auc")
+  auc <- auc@y.values[[1]]
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  cat(paste0("\n", "Area Under the ROC Curve", "\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+  print(paste('Area:', auc))
+  cat(paste0("\n"))
+  cat(paste0("\n", "............................................", "\n"))
+  cat(paste0("\n"))
+
+  #####################################################################################################
+  ### Write Output
+  #####################################################################################################
+  arc.progress_label("Writing output...")
+  arc.progress_pos(80)
+
+  if(!is.null(output_prediction_data) && output_prediction_data != "NA")
+    arc.write(output_prediction_data, d_df_full, shape_info = arc.shapeinfo(d))
+
+  arc.progress_pos(100)
+}