From fd9ea462ba438c023e33902449429eae6d2dc28f Mon Sep 17 00:00:00 2001 From: Johannes Ranke Date: Wed, 9 Nov 2022 10:08:37 +0100 Subject: Version 3.5, from a fresh installation --- CakeOlsFit.R | 185 ++++++++++++++++++++++------------------------------------- 1 file changed, 69 insertions(+), 116 deletions(-) mode change 100644 => 100755 CakeOlsFit.R (limited to 'CakeOlsFit.R') diff --git a/CakeOlsFit.R b/CakeOlsFit.R old mode 100644 new mode 100755 index e9b8f3c..66bf9b4 --- a/CakeOlsFit.R +++ b/CakeOlsFit.R @@ -7,7 +7,8 @@ # from the FME package, v 1.1 by Soetart and Petzoldt, which was in turn # inspired by summary.nls.lm -# Modifications developed by Tessella for Syngenta: Copyright (C) 2011-2020 Syngenta +# Modifications developed by Hybrid Intelligence (formerly Tessella), part of +# Capgemini Engineering, for Syngenta, Copyright (C) 2011-2022 Syngenta # Tessella Project Reference: 6245, 7247, 8361, 7414, 10091 # # The CAKE R modules are free software: you can redistribute it and/or modify @@ -39,8 +40,8 @@ # dfopDtMaxIter: The maximum number of iterations to apply to DFOP DT calculation. CakeOlsFit <- function(cake.model, observed, - parms.ini = rep(0.1, length(cake.model$parms)), - state.ini = c(100, rep(0, length(cake.model$diffs) - 1)), + parms.ini, + state.ini, lower = 0, upper = Inf, fixed_parms = NULL, @@ -48,126 +49,78 @@ CakeOlsFit <- function(cake.model, quiet = FALSE, atol = 1e-6, dfopDtMaxIter = 10000, - ...) -{ - mod_vars <- names(cake.model$diffs) - # Subset dataframe with mapped (modelled) variables - observed <- subset(observed, name %in% names(cake.model$map)) - # Get names of observed variables - obs_vars <- unique(as.character(observed$name)) - - # Name the parameters if they are not named yet - if(is.null(names(parms.ini))) names(parms.ini) <- cake.model$parms - - # Name the inital parameter values if they are not named yet - if(is.null(names(state.ini))) names(state.ini) <- mod_vars - - # Parameters to be optimised - parms.fixed <- parms.ini[fixed_parms] - optim_parms <- setdiff(names(parms.ini), fixed_parms) - parms.optim <- parms.ini[optim_parms] - - state.ini.fixed <- state.ini[fixed_initials] - optim_initials <- setdiff(names(state.ini), fixed_initials) - state.ini.optim <- state.ini[optim_initials] - state.ini.optim.boxnames <- names(state.ini.optim) - if(length(state.ini.optim) > 0) { - names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") - } - - # Decide if the solution of the model can be based on a simple analytical - # formula, the spectral decomposition of the matrix (fundamental system) - # or a numeric ode solver from the deSolve package - if (length(cake.model$map) == 1) { - solution <- "analytical" - } else { - solution <- "deSolve" - } - - # Create a function calculating the differentials specified by the model - # if necessary - if(solution == "deSolve") { - mkindiff <- function(t, state, parms) { - time <- t - diffs <- vector() - for (box in mod_vars) - { - diffname <- paste("d", box, sep="_") - diffs[diffname] <- with(as.list(c(time,state, parms)), - eval(parse(text=cake.model$diffs[[box]]))) - } - return(list(c(diffs))) - } - } - - costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed, - parms.fixed, observed, mkindiff = mkindiff, quiet, atol=atol, solution=solution, err=NULL) - - # modFit parameter transformations can explode if you put in parameters that are equal to a bound, so we move them away by a tiny amount. - all.optim <- ShiftAwayFromBoundaries(c(state.ini.optim, parms.optim), lower, upper) - - fit <-modFit(costFunctions$cost, all.optim, lower = lower, upper = upper, ...) - - # We need to return some more data for summary and plotting - fit$solution <- solution - - if (solution == "deSolve") { - fit$mkindiff <- mkindiff - } - - # We also need various other information for summary and plotting - fit$map <- cake.model$map - fit$diffs <- cake.model$diffs - - # Collect initial parameter values in two dataframes - fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) - fit$start$type <- c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) - fit$start$lower <- lower - fit$start$upper <- upper - - fit$fixed <- data.frame(value = c(state.ini.fixed, parms.fixed)) - fit$fixed$type <- c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) + control = list(), + useExtraSolver = FALSE) { + fit <- CakeFit("OLS", + cake.model, + observed, + parms.ini, + state.ini, + lower, + upper, + fixed_parms, + fixed_initials, + quiet, + atol = atol, + dfopDtMaxIter = dfopDtMaxIter, + control = control, + useExtraSolver = useExtraSolver) + + return(fit) +} - # Ensure initial state is at time 0 - obstimes <- unique(c(0,observed$time)) +GetOlsSpecificSetup <- function() { + return(function( + cake.model, + state.ini.optim, + state.ini.optim.boxnames, + state.ini.fixed, + parms.fixed, + observed, + mkindiff, + quiet, + atol, + solution, + ...) { + costFunctions <- CakeInternalCostFunctions(cake.model, state.ini.optim, state.ini.optim.boxnames, state.ini.fixed, + parms.fixed, observed, mkindiff = mkindiff, quiet, atol = atol, solution = solution, err = NULL) + return(list(costFunctions = costFunctions, costWithStatus = NULL)) + }) +} - out_predicted <- CakeOdeSolve(fit, obstimes, solution = solution, atol) - out_transformed <- PostProcessOdeOutput(out_predicted, cake.model, atol) +GetOlsOptimisationRoutine <- function() { + return(function(costFunctions, costForExtraSolver, useExtraSolver, parms, lower, upper, control, ...) { + fitStepResult <- RunFitStep(costFunctions$cost, costForExtraSolver, useExtraSolver, parms, lower, upper, control) + fit <- fitStepResult$fit + fitted_with_extra_solver <- fitStepResult$fitted_with_extra_solver - predicted_long <- wide_to_long(out_transformed, time = "time") - fit$predicted <- out_transformed + if (fitted_with_extra_solver) { + fit <- GetFitValuesAfterExtraSolver(fit, FF) - # Calculate chi2 error levels according to FOCUS (2006) - fit$errmin <- CakeChi2(cake.model, observed, predicted_long, obs_vars, parms.optim, state.ini.optim, state.ini, parms.ini, fit$fixed) + np <- length(parms) + fit$rank <- np + fit$df.residual <- length(fit$residuals) - fit$rank - # Calculate dissipation times DT50 and DT90 and formation fractions - parms.all <- c(fit$par, parms.fixed) - fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), - row.names = obs_vars) - fit$extraDT50<- data.frame(k1 = rep(NA, length(names(cake.model$map))), k2 = rep(NA, length(names(cake.model$map))), row.names = names(cake.model$map)) + # solnp can return an incorrect Hessian, so we use another fitting method at the optimised point to determine the Hessian + fit <- modFit(costFunctions$cost, fit$par, lower = lower, upper = upper, method = 'L-BFGS-B', control = list()) + } + return(list(fit = fit, fitted_with_extra_solver = fitted_with_extra_solver, res = NULL)) + }) +} - for (compartment.name in names(cake.model$map)) { - type <- names(cake.model$map[[compartment.name]])[1] +GetOlsSpecificWrapUp <- function() { + return(function(fit, ...) { + args <- list(...) + parms.fixed <- args$parms.fixed + observed <- args$observed - fit$distimes[compartment.name, ] <- CakeDT(type,compartment.name,parms.all,dfopDtMaxIter) - fit$extraDT50[compartment.name, ] <- CakeExtraDT(type, compartment.name, parms.all) - } + parms.all <- c(fit$par, parms.fixed) - fit$ioreRepDT <- CakeIORERepresentativeDT("Parent", parms.all) - fit$fomcRepDT <- CakeFOMCBackCalculatedDT(parms.all) - fit$penalties <- CakePenaltiesLong(parms.all, out_transformed, observed) + data <- observed + data$err <- rep(NA, length(data$time)) - # Collect observed, predicted and residuals - data<-observed - data$err<-rep(NA,length(data$time)) - data <- merge(data, predicted_long, by = c("time", "name")) - names(data)<-c("time", "variable", "observed","err-var", "predicted") - data$residual <- data$observed - data$predicted - data$variable <- ordered(data$variable, levels = obs_vars) - fit$data <- data[order(data$variable, data$time), ] - fit$atol <- atol - fit$topology <- cake.model$topology + class(fit) <- c("CakeFit", "mkinfit", "modFit") - class(fit) <- c("CakeFit", "mkinfit", "modFit") - return(fit) -} + return(list(fit = fit, parms.all = parms.all, data = data)) + }) +} \ No newline at end of file -- cgit v1.2.1