Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Don't merge] Draft PR for new model API #462

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ Imports:
polspline,
pROC,
PRROC,
R6,
reticulate (>= 1.30),
rlang,
SqlRender (>= 1.1.3),
Expand Down
142 changes: 142 additions & 0 deletions R/HyperparameterOptimization.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#auto-tuner
autoTuning <- function(
trainData,
self
){

# function to generate hyper-parameters iteratively - grid/random/mc sampling/genetic algorithm
# think about parallelization

summaryPerformance <- c()
aggregatePerformances <- c()
hyperparameterList <- list()

start <- T
repeat(
{

hyperparameter <- self$hyperparameterGenerator$getNextHyperparameters(summaryPerformance);
self$setHyperparameters(hyperparameter)

performances <- list()
for(i in 1:self$resamplingFunction$getInterationCount()){
message(paste0('resample ', i, ' of ', self$resamplingFunction$getInterationCount()))
# function to split trainData into train/val - cv/booststrap
dataIndexes <- self$resamplingFunction$getIndexes(
data = trainData,
iteration = i
) # list of train/validation or indexes?

validationPrediction <- self$fit(
data = trainData,
trainIndex = dataIndexes$trainRowIds,
validationIndex = dataIndexes$validationRowIds,
returnPredictionOnly = T
)
# user specified performance metric that takes prediction and spits out performance (could be multiple inputs)
performanceTemp <- self$performanceFunction$metricFunction(validationPrediction)
performances[[length(performances)+1]] <- performanceTemp

#summaryPerformance[[length(summaryPerformance) + 1]] <- list(
# hyperparameter = hyperparameter,
# fold = i,
# performance = performanceTemp
# )
}

message('Aggregating performance')
aggregatePerformanceIteration <- self$performanceFunction$aggregateFunction(performances)
message('aggregate performance: ', aggregatePerformanceIteration)

summaryPerformance[[length(summaryPerformance) + 1]] <- list(
hyperparameter = hyperparameter,
performances = performances,
aggregatePerformance = aggregatePerformanceIteration
)

if(start){
start <- F
message('Setting initial currentOptimal')
currentOptimal <- aggregatePerformanceIteration
optimalHyperparameters <- hyperparameter
}

# performance selection function - take performance vector to identify best hyper-params (returns index)
if(self$performanceFunction$maxmize){
if(currentOptimal < aggregatePerformanceIteration){
message('New maximum')
optimalHyperparameters = hyperparameter
currentOptimal <- aggregatePerformanceIteration
}
} else{
if(currentOptimal > aggregatePerformanceIteration){
message('New minimum')
optimalHyperparameters = hyperparameter
currentOptimal <- aggregatePerformanceIteration
}
}

if( self$hyperparameterGenerator$converged){
break
}
})

# return chosen hyper-parameters
self$hyperparametersFinal <- optimalHyperparameters
self$hyperparameterSummary <- summaryPerformance

#hyperparameterResults <- list(
# optimalHyperparameters = optimalHyperparameters,
# summaryPerformance = summaryPerformance
#)
}

LOOCV <- function(
trainData,
optimizationSettings, # nfold, seed, etc..
hyperparameterSettings, # list of hyper-parameters
modelSettings,
analysisId,
analysisPath
){

fun <- eval(parse(text = modelSettings$fitFunction))

performanceVals <- list()
for(hyperparameters in hyperparameterSettings$hyperparameterList){

performanceVal <- rep(0, length(hyperparameterSettings$hyperparameterList))
for(i in 1:nrow(trainData)){
args <- list(
trainData = trainData[-i,],
valData = trainData[i,],
hyperparameters = hyperparameters,
seed = optimizationSettings$seed
)
prediction <- do.call(fun, args) # prediction object
performanceVal[i] <- optimizationSettings$performanceFunction(prediction)
}

performanceVals[[length(performanceVals) + 1]] <- performanceVal
}

performanceValsMeans <- unlist(lapply(x = performanceVals, FUN = mean))

optimalInd <- hyperparameterSettings$optimalFunction(performanceValsMeans)


hyperparameterResults <- list(
optimalHyperparameters = hyperparameterSettings$hyperparameterList[[optimalInd]],
summaryPerformance = createSummaryPerformanceDataFrame(
hyperparameterSettings$hyperparameterList,
performanceVals,
performanceValsMeans
)
)
}


# add simple train/val split


# add genetic algorithm optimization
Loading
Loading