### supplemental material to:
### Michael Kloster, Daniel Langenkämper, Martin Zurowietz, Bánk Beszteri, Tim W. Nattkemper (2020) 
### Deep learning-based diatom taxonomy on virtual slides


### please execute scripts 01 - 03 prior to this one!!!


### please note: subfolder "images" must be present with the respective image data
 

### DL experiments 

require(keras)
library(caret)
library(e1071)

# general DL settings
Nepochs <- 50 # number of epochs to train
BatchSizeLarge <- 32 # batch size for training on the 100% data sets
BatchSizeSmall <- 8 # batch size for training on the 10% data subsets
RandomSeed <- 123 # inital value for random generator
NRunsLarge <- 3 # number of replicates for training on the 100% data sets
NRunsSmall <- 5 # number of replicates for training on the 10% data subsets



###########################################
#                                         #
# define here the CNN architecture to use #
# for the experiments                     #
#                                         #
###########################################
experiment.model <- ModelsList$VGG16_1FC # any of the models defined in ModelsList in script "03-CNN functions.R"
###########################################
 


# init result logging
logResults(init = TRUE) # by this, the logging will be restartet and possibly already existing result files will be overwritten

### Experiments


## initial experiment 0: base (i.e. A+B on A+B), 100%, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp00.ABonAB.unmasked")
experiment.data.A <- splitDLData(data.A.100p.unmasked, portionTrain = 0.72, portionValidate = 0.18, seed = RandomSeed)
experiment.data.B <- splitDLData(data.B.100p.unmasked, portionTrain = 0.72, portionValidate = 0.18, seed = RandomSeed)
experiment.data <- list()
experiment.data$train <- rbind (experiment.data.A$train, experiment.data.B$train)
experiment.data$validate <- rbind (experiment.data.A$validate, experiment.data.B$validate)
experiment.data$test <- rbind (experiment.data.A$test, experiment.data.B$test)
experiment.classes <- data.A.100p.unmasked.Classes
experiment.batchsize <- BatchSizeLarge
experiment.nRuns <- 1
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## the 16 experiments as defined in Table 4

## Exp. 1: A on A, 100%, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp01.AonA.100p.unmasked")
experiment.data <- data.A.100p.unmasked
experiment.classes <- data.A.100p.unmasked.Classes
experiment.batchsize <- BatchSizeLarge
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)


## Exp. 2: B on B, 100%, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp02.BonB.100p.unmasked")
experiment.data <- data.B.100p.unmasked
experiment.classes <- data.B.100p.unmasked.Classes
experiment.batchsize <- BatchSizeLarge
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)


## Exp. 3: A on A, 100%, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp03.AonA.100p.masked")
experiment.data <- data.A.100p.masked
experiment.classes <- data.A.100p.masked.Classes
experiment.batchsize <- BatchSizeLarge
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)


## Exp. 4: B on B, 100%, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp04.BonB.100p.masked")
experiment.data <- data.B.100p.masked
experiment.classes <- data.B.100p.masked.Classes
experiment.batchsize <- BatchSizeLarge
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)

## Exp. 5: A on A, 10% subset, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp05.AonA.10p.unmasked")
experiment.data <- data.A.10p.unmasked
experiment.classes <- data.A.10p.unmasked.Classes
experiment.batchsize <- BatchSizeSmall
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)


## Exp. 6: B on B, 10% subset, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp06.BonB.10p.unmasked")
experiment.data <- data.B.10p.unmasked
experiment.classes <- data.B.10p.unmasked.Classes
experiment.batchsize <- BatchSizeSmall
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)


## Exp. 7: A on A, 10% subset, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp07.AonA.10p.masked")
experiment.data <- data.A.10p.masked
experiment.classes <- data.A.10p.masked.Classes
experiment.batchsize <- BatchSizeSmall
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)


## Exp. 8: B on B, 10% subset, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp08.BonB.10p.masked")
experiment.data <- data.B.10p.masked
experiment.classes <- data.B.10p.masked.Classes
experiment.batchsize <- BatchSizeSmall
# training
experiment.evaluation.kfold <- trainModelkfold(projectName =  experiment.name, modelType = experiment.model, data = experiment.data, k = 4, portionTrain = 0.8, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation.kfold)


## Exp. 9: A on B, 100%, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp09.AonB.100p.unmasked")
experiment.data <- splitDLData(data.A.100p.unmasked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.B.100p.unmasked
experiment.classes <- data.A.100p.unmasked.Classes
experiment.batchsize <- BatchSizeLarge
experiment.nRuns <- NRunsLarge
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## Exp. 10: B on A, 100%, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp10.BonA.100p.unmasked")
experiment.data <- splitDLData(data.B.100p.unmasked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.A.100p.unmasked
experiment.classes <- data.B.100p.unmasked.Classes
experiment.batchsize <- BatchSizeLarge
experiment.nRuns <- NRunsLarge
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## Exp. 11: A on B, 100%, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp11.AonB.100p.masked")
experiment.data <- splitDLData(data.A.100p.masked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.B.100p.masked
experiment.classes <- data.A.100p.masked.Classes
experiment.batchsize <- BatchSizeLarge
experiment.nRuns <- NRunsLarge
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## Exp. 12: B on A, 100%, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp12.BonA.100p.masked")
experiment.data <- splitDLData(data.B.100p.masked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.A.100p.masked
experiment.classes <- data.B.100p.masked.Classes
experiment.batchsize <- BatchSizeLarge
experiment.nRuns <- NRunsLarge
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## Exp. 13: A on B, 10% subset, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp13.AonB.10p.unmasked")
experiment.data <- splitDLData(data.A.10p.unmasked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.B.10p.unmasked
experiment.classes <- data.A.10p.unmasked.Classes
experiment.batchsize <- BatchSizeSmall
experiment.nRuns <- NRunsSmall
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## Exp. 14: B on A, 10% subset, unmasked background
# setup
experiment.name <- paste0(experiment.model, ".Exp14.BonA.10p.unmasked")
experiment.data <- splitDLData(data.B.10p.unmasked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.A.10p.unmasked
experiment.classes <- data.B.10p.unmasked.Classes
experiment.batchsize <- BatchSizeSmall
experiment.nRuns <- NRunsSmall
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## Exp. 15: A on B, 10% subset, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp15.AonB.10p.masked")
experiment.data <- splitDLData(data.A.10p.masked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.B.10p.masked
experiment.classes <- data.A.10p.masked.Classes
experiment.batchsize <- BatchSizeSmall
experiment.nRuns <- NRunsSmall
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)


## Exp. 16: B on A, 10% subset, masked background
# setup
experiment.name <- paste0(experiment.model, ".Exp16.BonA.10p.masked")
experiment.data <- splitDLData(data.B.10p.masked, portionTrain = 0.8, seed = RandomSeed)
experiment.data$test <- data.A.10p.masked
experiment.classes <- data.B.10p.masked.Classes
experiment.batchsize <- BatchSizeSmall
experiment.nRuns <- NRunsSmall
# training
experiment.evaluation <- trainModelnTimes(projectName = experiment.name, nRuns = experiment.nRuns, modelType = experiment.model, dataTraining = experiment.data$train, dataValidation = experiment.data$validate, dataTest = experiment.data$test, nEpochs = Nepochs, batchSize = experiment.batchsize, classes = experiment.classes, seed = RandomSeed)
# results collection
logResults(model = experiment.model, evaluationData = experiment.evaluation)



