From 9a867d940679498c77a32be9c90f81200019b821 Mon Sep 17 00:00:00 2001
From: jranke <jranke@edb9625f-4e0d-4859-8d74-9fd3b1da38cb>
Date: Thu, 10 Oct 2013 13:42:22 +0000
Subject: - IRLS is implemented

git-svn-id: svn+ssh://svn.r-forge.r-project.org/svnroot/kinfit/pkg/mkin@109 edb9625f-4e0d-4859-8d74-9fd3b1da38cb
---
 ChangeLog      |  1 +
 DESCRIPTION    |  4 ++--
 R/mkinerrmin.R |  4 ++--
 R/mkinfit.R    | 72 ++++++++++++++++++++++++++++++++++++++++++++++------------
 TODO           |  1 +
 man/mkinfit.Rd | 64 ++++++++++++++++++++++++++++++++++++++++++++-------
 6 files changed, 120 insertions(+), 26 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 5f430343..3aabc99d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -2,6 +2,7 @@
 
 	* Show the weighting method for residuals in the summary
 	* Correct the output of the data in the case of manual weighting
+	* Implement IRLS assuming different variances for observed variables
 
 
 2013-10-09  Johannes Ranke <jranke@uni-bremen.de> for mkin (0.9-22)
diff --git a/DESCRIPTION b/DESCRIPTION
index 4c6f2a4b..30ca6121 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,8 +2,8 @@ Package: mkin
 Type: Package
 Title: Routines for fitting kinetic models with one or more state
   variables to chemical degradation data
-Version: 0.9-21
-Date: 2013-10-08
+Version: 0.9-22
+Date: 2013-10-10
 Author: Johannes Ranke, with contributions from Katrin Lindenberger, René Lehmann
 Maintainer: Johannes Ranke <jranke@uni-bremen.de>
 Description: Calculation routines based on the FOCUS Kinetics Report (2006).
diff --git a/R/mkinerrmin.R b/R/mkinerrmin.R
index 2b499d97..7d9d4626 100644
--- a/R/mkinerrmin.R
+++ b/R/mkinerrmin.R
@@ -17,13 +17,13 @@
 
 # You should have received a copy of the GNU General Public License along with
 # this program. If not, see <http://www.gnu.org/licenses/>
-if(getRversion() >= '2.15.1') utils::globalVariables(c("name"))
+if(getRversion() >= '2.15.1') utils::globalVariables(c("name", "value_mean"))
 
 mkinerrmin <- function(fit, alpha = 0.05)
 {
   parms.optim <- fit$par
   kinerrmin <- function(errdata, n.parms) {
-    means.mean <- mean(errdata$value_mean, na.rm=TRUE)
+    means.mean <- mean(errdata$value_mean, na.rm = TRUE)
     df = length(errdata$value_mean) - n.parms
   
     f <- function(err)
diff --git a/R/mkinfit.R b/R/mkinfit.R
index e7b1fb0c..b46184b7 100644
--- a/R/mkinfit.R
+++ b/R/mkinfit.R
@@ -33,6 +33,8 @@ mkinfit <- function(mkinmod, observed,
   plot = FALSE, quiet = FALSE,
   err = NULL, weight = "none", scaleVar = FALSE,
   atol = 1e-8, rtol = 1e-10, n.outtimes = 100,
+  reweight.method = NULL,
+  reweight.tol = 1e-8, reweight.max.iter = 10,
   trace_parms = FALSE,
   ...)
 {
@@ -125,7 +127,7 @@ mkinfit <- function(mkinmod, observed,
   {
     assign("calls", calls+1, inherits=TRUE) # Increase the model solution counter
 
-    # Trace parameter values if quiet is off
+    # Trace parameter values if requested
     if(trace_parms) cat(P, "\n")
 
     # Time points at which observed data are available
@@ -183,9 +185,44 @@ mkinfit <- function(mkinmod, observed,
     }
     return(mC)
   }
+
   fit <- modFit(cost, c(state.ini.optim, parms.optim), 
                 method = method.modFit, control = control.modFit, ...)
 
+  # Reiterate the fit until convergence of the variance components (IRLS)
+  # if requested by the user
+  weight.ini = weight
+  if (!is.null(err)) weight.ini = "manual"
+
+  if (!is.null(reweight.method)) {
+    if (reweight.method != "obs") stop("Only reweighting method 'obs' is implemented")
+    if(!quiet) {
+      cat("IRLS based on variance estimates for each observed variable\n")
+    }
+    if (!quiet) {
+      cat("Initial variance estimates are:\n")
+      print(signif(fit$var_ms_unweighted, 8))
+    }
+    reweight.diff = 1
+    n.iter <- 0
+    if (!is.null(err)) observed$err.ini <- observed[[err]]
+    err = "err.irls"
+    while (reweight.diff > reweight.tol & n.iter < reweight.max.iter) {
+      n.iter <- n.iter + 1
+      sigma.old <- sqrt(fit$var_ms_unweighted)
+      observed[err] <- sqrt(fit$var_ms_unweighted)[observed$name]
+      fit <- modFit(cost, fit$par, method = method.modFit,
+                    control = control.modFit, ...)
+      reweight.diff = sum((sqrt(fit$var_ms_unweighted) - sigma.old)^2)
+      if (!quiet) {
+        cat("Iteration", n.iter, "yields variance estimates:\n")
+        print(signif(fit$var_ms_unweighted, 8))
+        cat("Sum of squared differences to last variance estimates:",
+            signif(reweight.diff, 2), "\n")
+      }
+    }
+  }
+
   # We need to return some more data for summary and plotting
   fit$solution_type <- solution_type
 
@@ -214,19 +251,23 @@ mkinfit <- function(mkinmod, observed,
                                         mod_vars)
   bparms.all = c(bparms.optim, bparms.fixed)
 
-  # Collect observed, predicted and residuals
+  # Collect observed, predicted, residuals and weighting
   data <- merge(fit$observed, fit$predicted, by = c("time", "name"))
-  if (is.null(err)) {
-    names(data) <- c("time", "variable", "observed", "predicted")
-  } else {
-    names(data) <- c("time", "variable", "observed", "err", "predicted")
-  }
-  data$residual <- data$observed - data$predicted
-  data$variable <- ordered(data$variable, levels = obs_vars)
-  fit$data <- data[order(data$variable, data$time), ]
+  data$name <- ordered(data$name, levels = obs_vars)
+  data <- data[order(data$name, data$time), ]
+
+  fit$data <- data.frame(time = data$time,
+                         variable = data$name,
+                         observed = data$value.x,
+                         predicted = data$value.y)
+  fit$data$residual <- fit$data$observed - fit$data$predicted
+  if (!is.null(data$err.ini)) fit$data$err.ini <- data$err.ini
+  if (!is.null(err)) fit$data[[err]] <- data[[err]]
+
   fit$atol <- atol
   fit$rtol <- rtol
-  fit$weight <- ifelse(is.null(err), weight, "manual")
+  fit$weight.ini <- weight.ini
+  fit$reweight.method <- reweight.method
 
   # Return all backtransformed parameters for summary
   fit$bparms.optim <- bparms.optim 
@@ -282,7 +323,8 @@ summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, alpha = 0.05,
 	  date.summary = date(),
 	  solution_type = object$solution_type,
 	  use_of_ff = object$mkinmod$use_of_ff,
-    weight = object$weight,
+    weight.ini = object$weight.ini,
+    reweight.method = object$reweight.method,
     residuals = object$residuals,
     residualVariance = resvar,
     sigma = sqrt(resvar),
@@ -329,8 +371,10 @@ print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), .
   cat("\nMethod used for solution of differential equation system:\n")
   cat(x$solution_type, "\n")
 
-  cat("\nWeighting method:\n")
-  cat(x$weight, "\n")
+  cat("\nWeighting:", x$weight.ini)
+  if(!is.null(x$reweight.method)) cat(" then iterative reweighting method",
+                                      x$reweight.method)
+  cat("\n")
 
   cat("\nStarting values for optimised parameters:\n")
   print(x$start)
diff --git a/TODO b/TODO
index 25f5478b..a56ca55f 100644
--- a/TODO
+++ b/TODO
@@ -3,6 +3,7 @@ TODO for version 1.0
   for the user - they will be tested during R CMD check anyway
 - Let mkin call mkin_wide_to_long automatically, if observed data are suitably defined
 - Think about what a user would expect from version 1.0
+- Check the chi2 error calculation in mkinerrmin.R. KinGUII does this without iteration
 
 Nice to have:
 - Improve choice of starting parameters, in order to avoid failure of eigenvalue based solutions
diff --git a/man/mkinfit.Rd b/man/mkinfit.Rd
index 445bce2e..a080f8ae 100644
--- a/man/mkinfit.Rd
+++ b/man/mkinfit.Rd
@@ -20,7 +20,9 @@ mkinfit(mkinmod, observed,
   plot = FALSE, quiet = FALSE, err = NULL, weight = "none", 
   scaleVar = FALSE, 
   atol = 1e-8, rtol = 1e-10, n.outtimes = 100, 
-  trace_parms, ...)
+  reweight.method = NULL,
+  reweight.tol = 1e-8, reweight.max.iter = 10,
+  trace_parms = FALSE, ...)
 }
 \arguments{
   \item{mkinmod}{
@@ -94,7 +96,7 @@ mkinfit(mkinmod, observed,
     \emph{error} estimates, used to weigh the residuals (see details of
     \code{\link{modCost}}); if \code{NULL}, then the residuals are not weighed.
   }
-  \item{weight}{only if \code{err}=\code{NULL}: how to weigh the
+  \item{weight}{only if \code{err}=\code{NULL}: how to weight the
     residuals, one of "none", "std", "mean", see details of \code{\link{modCost}}.
   }
   \item{scaleVar}{
@@ -115,6 +117,23 @@ mkinfit(mkinmod, observed,
     the numerical solver if that is used (see \code{solution} argument. 
     The default value is 100.
   }
+  \item{reweight.method}{
+    The method used for iteratively reweighting residuals, also known
+    as iteratively reweighted least squares (IRLS). Default is NULL,
+    the other method implemented is called "obs", meaning that each
+    observed variable is assumed to have its own variance, this is 
+    estimated from the fit and used for weighting the residuals
+    in each iteration until convergence of this estimate up to 
+    \code{reweight.tol} or up to the maximum number of iterations
+    specified by \code{reweight.maxiter}.
+  }
+  \item{reweight.tol}{
+    Tolerance for convergence criterion for the variance components
+    in IRLS fits.
+  }
+  \item{reweight.max.iter}{
+    Maximum iterations in IRLS fits.
+  }
   \item{trace_parms}{
     Should a trace of the parameter values be listed?
   }
@@ -123,8 +142,14 @@ mkinfit(mkinmod, observed,
   }
 }
 \value{
-    A list with "mkinfit" and "modFit" in the class attribute. 
-    A summary can be obtained by \code{\link{summary.mkinfit}}. 
+  A list with "mkinfit" and "modFit" in the class attribute. 
+  A summary can be obtained by \code{\link{summary.mkinfit}}. 
+}
+\note{
+  The implementation of iteratively reweighted least squares is inspired by the
+  work of the KinGUII team at Bayer Crop Science (Walter Schmitt and Zhenglei
+  Gao). A similar implemention can also be found in CAKE 2.0, which is the
+  other GUI derivative of mkin, sponsored by Syngenta.
 }
 \author{
   Johannes Ranke <jranke@uni-bremen.de>
@@ -136,7 +161,6 @@ SFO_SFO <- mkinmod(
   m1 = list(type = "SFO"))
 # Fit the model to the FOCUS example dataset D using defaults
 fit <- mkinfit(SFO_SFO, FOCUS_2006_D)
-str(fit)
 summary(fit)
 
 # Use stepwise fitting, using optimised parameters from parent only fit, FOMC
@@ -147,9 +171,10 @@ FOMC_SFO <- mkinmod(
   m1 = list(type = "SFO"))
 # Fit the model to the FOCUS example dataset D using defaults
 fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D)
-# Use starting parameters from parent only FOMC fit (not really needed in this case)
-fit.FOMC = mkinfit(FOMC, FOCUS_2006_D)
-fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, parms.ini = fit.FOMC$bparms.ode, plot=TRUE)
+# Use starting parameters from parent only FOMC fit
+fit.FOMC = mkinfit(FOMC, FOCUS_2006_D, plot=TRUE)
+fit.FOMC_SFO <- mkinfit(FOMC_SFO, FOCUS_2006_D, 
+  parms.ini = fit.FOMC$bparms.ode, plot=TRUE)
 }
 
 # Use stepwise fitting, using optimised parameters from parent only fit, SFORB
@@ -162,6 +187,29 @@ fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D)
 # Use starting parameters from parent only SFORB fit (not really needed in this case)
 fit.SFORB = mkinfit(SFORB, FOCUS_2006_D)
 fit.SFORB_SFO <- mkinfit(SFORB_SFO, FOCUS_2006_D, parms.ini = fit.SFORB$bparms.ode, plot=TRUE)
+
+# Weighted fits, including IRLS
+SFO_SFO.ff <- mkinmod(parent = list(type = "SFO", to = "m1"),
+                      m1 = list(type = "SFO"), use_of_ff = "max")
+f.noweight <- mkinfit(SFO_SFO.ff, FOCUS_2006_D)
+summary(f.noweight)
+f.irls <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, reweight.method = "obs")
+summary(f.irls)
+f.w.mean <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, weight = "mean")
+summary(f.w.mean)
+f.w.mean.irls <- mkinfit(SFO_SFO.ff, FOCUS_2006_D, weight = "mean",
+                         reweight.method = "obs")
+summary(f.w.mean.irls)
+
+# Manual weighting
+dw <- FOCUS_2006_D
+errors <- c(parent = 2, m1 = 1)
+dw$err.man <- errors[FOCUS_2006_D$name]
+f.w.man <- mkinfit(SFO_SFO.ff, dw, err = "err.man")
+summary(f.w.man)
+f.w.man.irls <- mkinfit(SFO_SFO.ff, dw, err = "err.man",
+                       reweight.method = "obs")
+summary(f.w.man.irls)
 }
 \keyword{ models }
 \keyword{ optimize }
-- 
cgit v1.2.3