Skip to content

Commit

Permalink
Logistic Regression R Script Tool Wrappings
Browse files Browse the repository at this point in the history
The script tool used during the Live Training Seminar: Go Deeper with Data Analytics Using ArcGIS Pro and R
  • Loading branch information
marpobuda authored Sep 5, 2017
1 parent f911efe commit 02a4f5f
Showing 1 changed file with 144 additions and 0 deletions.
144 changes: 144 additions & 0 deletions scripts/Logistic_Regression_Wrappings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
#Live Training Seminar: Go Deeper with Data Analytics Using ArcGIS Pro and R
#Logistic Regression R Script Wrappings
tool_exec<- function(in_params, out_params){

#####################################################################################################
### Check/Load Required Packages
#####################################################################################################
arc.progress_label("Loading packages...")
arc.progress_pos(20)

if(!requireNamespace("MKmisc", quietly = TRUE))
install.packages("MKmisc", quiet = TRUE)
if(!requireNamespace("ROCR", quietly = TRUE))
install.packages("ROCR", quiet = TRUE)
if(!requireNamespace("survey", quietly = TRUE))
install.packages("survey", quiet = TRUE)
if(!requireNamespace("pROC", quietly = TRUE))
install.packages("pROC", quiet = TRUE)
if(!requireNamespace("ROCR", quietly = TRUE))
install.packages("ROCR", quiet = TRUE)
if(!requireNamespace("caret", quietly = TRUE))
install.packages("caret", quiet = TRUE)

require(MKmisc)
require(ROCR)
require(survey)
require(pROC)
require(ROCR)
require(caret)

#####################################################################################################
### Define input/output parameters
#####################################################################################################
input_data <- in_params[[1]]
train_percentage_size <- (in_params[[2]])/100
dependent_variable <- in_params[[3]]
independent_variables <- in_params[[4]]

output_prediction_data <- out_params[[1]]

#####################################################################################################
### Load Data and Create Dataframe R Object
#####################################################################################################
arc.progress_label("Loading data...")
arc.progress_pos(40)

d <- arc.open(input_data)
fields_list <- append(c(dependent_variable), independent_variables)
d_df_full <- arc.select(d)
d_df <- arc.select(d, fields = fields_list)

#####################################################################################################
### Create Training and Testing Datasets
#####################################################################################################
arc.progress_label("Creating training and testing datasets...")
arc.progress_pos(60)
smp_size <- floor(train_percentage_size * nrow(d_df))

set.seed(1234)
train_ind <- sample(seq_len(nrow(d_df)), size = smp_size)

d_df_train <- d_df[train_ind, ]
d_df_test <- d_df[-train_ind, ]

#####################################################################################################
### Fit Logistic Regression Model
#####################################################################################################
arc.progress_label("Creating training and testing datasets...")
arc.progress_pos(80)

response <- d_df_train[, 1]
predictors <- d_df_train[, -1]

d_df_train.log <- glm(response ~ ., family = binomial(link = 'logit'), data = predictors)

d_df_full$Seagrass_Prediction <- predict(d_df_train.log, newdata = d_df_full, type = "response")

#####################################################################################################
### Run Diagnostics on Logistic Regression Model
#####################################################################################################
arc.progress_label("Running diagnostics on fitted model...")
arc.progress_pos(80)

#Summary of model fit
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
cat(paste0("\n", "Summary of Fitted Logistic Regression Model", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
print(summary(d_df_train.log))

#Hosmer-Lemeshow Test
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
cat(paste0("\n", "Hosmer-Lemeshow Goodness of Fit Test Results", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
HL <- HLgof.test(fit = fitted(d_df_train.log), obs = d_df_train$Seagrass)
print(HL)

#Prediction Accuracy
d_df_test.log.pred <- predict(d_df_train.log, newdata = d_df_test, type = 'response')
d_df_test.log.pred <- ifelse(d_df_test.log.pred > 0.5, 1, 0)
misClassificError <- mean(d_df_test.log.pred != d_df_test$Seagrass)
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
cat(paste0("\n", "Prediction Accuracy for Test Dataset", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
print(paste('Accuracy Percentage:', round((1-misClassificError)*100, 2)))
cat(paste0("\n"))

#ROC Curve
d_df_test.log.pred <- predict(d_df_train.log, newdata = d_df_test, type = 'response')
pred <- prediction(d_df_test.log.pred, d_df_test$Seagrass)
perf <- performance(pred, measure = "tpr", x.measure = "fpr")
plot(perf)
auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
cat(paste0("\n", "Area Under the ROC Curve", "\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))
print(paste('Area:', auc))
cat(paste0("\n"))
cat(paste0("\n", "............................................", "\n"))
cat(paste0("\n"))

#####################################################################################################
### Write Output
#####################################################################################################
arc.progress_label("Writing output...")
arc.progress_pos(80)

if(!is.null(output_prediction_data) && output_prediction_data != "NA")
arc.write(output_prediction_data, d_df_full, shape_info = arc.shapeinfo(d))

arc.progress_pos(100)
}

0 comments on commit 02a4f5f

Please sign in to comment.