From 9a867d940679498c77a32be9c90f81200019b821 Mon Sep 17 00:00:00 2001 From: jranke Date: Thu, 10 Oct 2013 13:42:22 +0000 Subject: - IRLS is implemented git-svn-id: svn+ssh://svn.r-forge.r-project.org/svnroot/kinfit/pkg/mkin@109 edb9625f-4e0d-4859-8d74-9fd3b1da38cb --- ChangeLog | 1 + DESCRIPTION | 4 ++-- R/mkinerrmin.R | 4 ++-- R/mkinfit.R | 72 ++++++++++++++++++++++++++++++++++++++++++++++------------ TODO | 1 + man/mkinfit.Rd | 64 ++++++++++++++++++++++++++++++++++++++++++++------- 6 files changed, 120 insertions(+), 26 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5f43034..3aabc99 100644 --- a/ChangeLog +++ b/ChangeLog @@ -2,6 +2,7 @@ * Show the weighting method for residuals in the summary * Correct the output of the data in the case of manual weighting + * Implement IRLS assuming different variances for observed variables 2013-10-09 Johannes Ranke for mkin (0.9-22) diff --git a/DESCRIPTION b/DESCRIPTION index 4c6f2a4..30ca612 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,8 +2,8 @@ Package: mkin Type: Package Title: Routines for fitting kinetic models with one or more state variables to chemical degradation data -Version: 0.9-21 -Date: 2013-10-08 +Version: 0.9-22 +Date: 2013-10-10 Author: Johannes Ranke, with contributions from Katrin Lindenberger, René Lehmann Maintainer: Johannes Ranke Description: Calculation routines based on the FOCUS Kinetics Report (2006). diff --git a/R/mkinerrmin.R b/R/mkinerrmin.R index 2b499d9..7d9d462 100644 --- a/R/mkinerrmin.R +++ b/R/mkinerrmin.R @@ -17,13 +17,13 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see -if(getRversion() >= '2.15.1') utils::globalVariables(c("name")) +if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "value_mean")) mkinerrmin <- function(fit, alpha = 0.05) { parms.optim <- fit$par kinerrmin <- function(errdata, n.parms) { - means.mean <- mean(errdata$value_mean, na.rm=TRUE) + means.mean <- mean(errdata$value_mean, na.rm = TRUE) df = length(errdata$value_mean) - n.parms f <- function(err) diff --git a/R/mkinfit.R b/R/mkinfit.R index e7b1fb0..b46184b 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -33,6 +33,8 @@ mkinfit <- function(mkinmod, observed, plot = FALSE, quiet = FALSE, err = NULL, weight = "none", scaleVar = FALSE, atol = 1e-8, rtol = 1e-10, n.outtimes = 100, + reweight.method = NULL, + reweight.tol = 1e-8, reweight.max.iter = 10, trace_parms = FALSE, ...) { @@ -125,7 +127,7 @@ mkinfit <- function(mkinmod, observed, { assign("calls", calls+1, inherits=TRUE) # Increase the model solution counter - # Trace parameter values if quiet is off + # Trace parameter values if requested if(trace_parms) cat(P, "\n") # Time points at which observed data are available @@ -183,9 +185,44 @@ mkinfit <- function(mkinmod, observed, } return(mC) } + fit <- modFit(cost, c(state.ini.optim, parms.optim), method = method.modFit, control = control.modFit, ...) + # Reiterate the fit until convergence of the variance components (IRLS) + # if requested by the user + weight.ini = weight + if (!is.null(err)) weight.ini = "manual" + + if (!is.null(reweight.method)) { + if (reweight.method != "obs") stop("Only reweighting method 'obs' is implemented") + if(!quiet) { + cat("IRLS based on variance estimates for each observed variable\n") + } + if (!quiet) { + cat("Initial variance estimates are:\n") + print(signif(fit$var_ms_unweighted, 8)) + } + reweight.diff = 1 + n.iter <- 0 + if (!is.null(err)) observed$err.ini <- observed[[err]] + err = "err.irls" + while (reweight.diff > reweight.tol & n.iter < reweight.max.iter) { + n.iter <- n.iter + 1 + sigma.old <- sqrt(fit$var_ms_unweighted) + observed[err] <- sqrt(fit$var_ms_unweighted)[observed$name] + fit <- modFit(cost, fit$par, method = method.modFit, + control = control.modFit, ...) + reweight.diff = sum((sqrt(fit$var_ms_unweighted) - sigma.old)^2) + if (!quiet) { + cat("Iteration", n.iter, "yields variance estimates:\n") + print(signif(fit$var_ms_unweighted, 8)) + cat("Sum of squared differences to last variance estimates:", + signif(reweight.diff, 2), "\n") + } + } + } + # We need to return some more data for summary and plotting fit$solution_type <- solution_type @@ -214,19 +251,23 @@ mkinfit <- function(mkinmod, observed, mod_vars) bparms.all = c(bparms.optim, bparms.fixed) - # Collect observed, predicted and residuals + # Collect observed, predicted, residuals and weighting data <- merge(fit$observed, fit$predicted, by = c("time", "name")) - if (is.null(err)) { - names(data) <- c("time", "variable", "observed", "predicted") - } else { - names(data) <- c("time", "variable", "observed", "err", "predicted") - } - data$residual <- data$observed - data$predicted - data$variable <- ordered(data$variable, levels = obs_vars) - fit$data <- data[order(data$variable, data$time), ] + data$name <- ordered(data$name, levels = obs_vars) + data <- data[order(data$name, data$time), ] + + fit$data <- data.frame(time = data$time, + variable = data$name, + observed = data$value.x, + predicted = data$value.y) + fit$data$residual <- fit$data$observed - fit$data$predicted + if (!is.null(data$err.ini)) fit$data$err.ini <- data$err.ini + if (!is.null(err)) fit$data[[err]] <- data[[err]] + fit$atol <- atol fit$rtol <- rtol - fit$weight <- ifelse(is.null(err), weight, "manual") + fit$weight.ini <- weight.ini + fit$reweight.method <- reweight.method # Return all backtransformed parameters for summary fit$bparms.optim <- bparms.optim @@ -282,7 +323,8 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05, date.summary = date(), solution_type = object$solution_type, use_of_ff = object$mkinmod$use_of_ff, - weight = object$weight, + weight.ini = object$weight.ini, + reweight.method = object$reweight.method, residuals = object$residuals, residualVariance = resvar, sigma = sqrt(resvar), @@ -329,8 +371,10 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), . cat("\nMethod used for solution of differential equation system:\n") cat(x$solution_type, "\n") - cat("\nWeighting method:\n") - cat(x$weight, "\n") + cat("\nWeighting:", x$weight.ini) + if(!is.null(x$reweight.method)) cat(" then iterative reweighting method", + x$reweight.method) + cat("\n") cat("\nStarting values for optimised parameters:\n") print(x$start) diff --git a/TODO b/TODO index 25f5478..a56ca55 100644 --- a/TODO +++ b/TODO @@ -3,6 +3,7 @@ TODO for version 1.0 for the user - they will be tested during R CMD check anyway - Let mkin call mkin_wide_to_long automatically, if observed data are suitably defined - Think about what a user would expect from version 1.0 +- Check the chi2 error calculation in mkinerrmin.R. KinGUII does this without iteration Nice to have: - Improve choice of starting parameters, in order to avoid failure of eigenvalue based solutions diff --git a/man/mkinfit.Rd b/man/mkinfit.Rd index 445bce2..a080f8a 100644 --- a/man/mkinfit.Rd +++ b/man/mkinfit.Rd @@ -20,7 +20,9 @@ mkinfit(mkinmod, observed, plot = FALSE, quiet = FALSE, err = NULL, weight = "none", scaleVar = FALSE, atol = 1e-8, rtol = 1e-10, n.outtimes = 100, - trace_parms, ...) + reweight.method = NULL, + reweight.tol = 1e-8, reweight.max.iter = 10, + trace_parms = FALSE, ...) } \arguments{ \item{mkinmod}{ @@ -94,7 +96,7 @@ mkinfit(mkinmod, observed, \emph{error} estimates, used to weigh the residuals (see details of \code{\link{modCost}}); if \code{NULL}, then the residuals are not weighed. } - \item{weight}{only if \code{err}=\code{NULL}: how to weigh the + \item{weight}{only if \code{err}=\code{NULL}: how to weight the residuals, one of "none", "std", "mean", see details of \code{\link{modCost}}. } \item{scaleVar}{ @@ -115,6 +117,23 @@ mkinfit(mkinmod, observed, the numerical solver if that is used (see \code{solution} argument. The default value is 100. } + \item{reweight.method}{ + The method used for iteratively reweighting residuals, also known + as iteratively reweighted least squares (IRLS). Default is NULL, + the other method implemented is called "obs", meaning that each + observed variable is assumed to have its own variance, this is + estimated from the fit and used for weighting the residuals + in each iteration until convergence of this estimate up to + \code{reweight.tol} or up to the maximum number of iterations + specified by \code{reweight.maxiter}. + } + \item{reweight.tol}{ + Tolerance for convergence criterion for the variance components + in IRLS fits. + } + \item{reweight.max.iter}{ + Maximum iterations in IRLS fits. + } \item{trace_parms}{ Should a trace of the parameter values be listed? } @@ -123,8 +142,14 @@ mkinfit(mkinmod, observed, } } \value{ - A list with "mkinfit" and "modFit" in the class attribute. - A summary can be obtained by \code{\link{summary.mkinfit}}. + A list with "mkinfit" and "modFit" in the class attribute. + A summary can be obtained by \code{\link{summary.mkinfit}}. +} +\note{ + The implementation of iteratively reweighted least squares is inspired by the + work of the KinGUII team at Bayer Crop Science (Walter Schmitt and Zhenglei + Gao). A similar implemention can also be found in CAKE 2.0, which is the + other GUI derivative of mkin, sponsored by Syngenta. } \author{ Johannes Ranke @@ -136,7 +161,6 @@ SFO_SFO <- mkinmod( m1 = list(type = "SFO")) # Fit the model to the FOCUS example dataset D using defaults fit <- mkinfit(SFO_SFO, FOCUS_2006_D) -str(fit) summary(fit) # Use stepwise fitting, using optimised parameters from parent only fit, FOMC @@ -147,9 +171,10 @@ FOMC_SFO <- mkinmod( m1 = list(type = "SFO")) # Fit the model to the FOCUS example dataset D using defaults fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D) -# Use starting parameters from parent only FOMC fit (not really needed in this case) -fit.FOMC = mkinfit(FOMC, FOCUS_2006_D) -fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, parms.ini = fit.FOMC$bparms.ode, plot=TRUE) +# Use starting parameters from parent only FOMC fit +fit.FOMC = mkinfit(FOMC, FOCUS_2006_D, plot=TRUE) +fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, + parms.ini = fit.FOMC$bparms.ode, plot=TRUE) } # Use stepwise fitting, using optimised parameters from parent only fit, SFORB @@ -162,6 +187,29 @@ fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D) # Use starting parameters from parent only SFORB fit (not really needed in this case) fit.SFORB = mkinfit(SFORB, FOCUS_2006_D) fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, plot=TRUE) + +# Weighted fits, including IRLS +SFO_SFO.ff <- mkinmod(parent = list(type = "SFO", to = "m1"), + m1 = list(type = "SFO"), use_of_ff = "max") +f.noweight <- mkinfit(SFO_SFO.ff, FOCUS_2006_D) +summary(f.noweight) +f.irls <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, reweight.method = "obs") +summary(f.irls) +f.w.mean <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, weight = "mean") +summary(f.w.mean) +f.w.mean.irls <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, weight = "mean", + reweight.method = "obs") +summary(f.w.mean.irls) + +# Manual weighting +dw <- FOCUS_2006_D +errors <- c(parent = 2, m1 = 1) +dw$err.man <- errors[FOCUS_2006_D$name] +f.w.man <- mkinfit(SFO_SFO.ff, dw, err = "err.man") +summary(f.w.man) +f.w.man.irls <- mkinfit(SFO_SFO.ff, dw, err = "err.man", + reweight.method = "obs") +summary(f.w.man.irls) } \keyword{ models } \keyword{ optimize } -- cgit v1.2.1