From 16f5b1d3c0136413e92b2be0f20d365e92e9cd1c Mon Sep 17 00:00:00 2001 From: jranke Date: Tue, 18 May 2010 12:58:38 +0000 Subject: Much more complete version that was just submitted to CRAN. git-svn-id: svn+ssh://svn.r-forge.r-project.org/svnroot/kinfit/pkg/mkin@9 edb9625f-4e0d-4859-8d74-9fd3b1da38cb --- R/mkin_long_to_wide.R | 9 ++ R/mkin_wide_to_long.R | 13 +++ R/mkinerrmin.R | 14 +++ R/mkinfit.R | 240 ++++++++++++++++++++++++++++++++++++++++++++++---- R/mkinmod.R | 66 ++++++++++---- R/mkinplot.R | 49 +++++++++++ 6 files changed, 356 insertions(+), 35 deletions(-) create mode 100644 R/mkin_long_to_wide.R create mode 100644 R/mkin_wide_to_long.R create mode 100644 R/mkinerrmin.R create mode 100644 R/mkinplot.R (limited to 'R') diff --git a/R/mkin_long_to_wide.R b/R/mkin_long_to_wide.R new file mode 100644 index 0000000..45ee0a1 --- /dev/null +++ b/R/mkin_long_to_wide.R @@ -0,0 +1,9 @@ +mkin_long_to_wide <- function(long_data, time = "time") +{ + colnames <- unique(long_data$name) + wide_data <- data.frame(time = subset(long_data, name == colnames[1], time)) + for (var in colnames) { + wide_data[var] <- subset(long_data, name == var, value) + } + return(wide_data) +} diff --git a/R/mkin_wide_to_long.R b/R/mkin_wide_to_long.R new file mode 100644 index 0000000..6db2f33 --- /dev/null +++ b/R/mkin_wide_to_long.R @@ -0,0 +1,13 @@ +mkin_wide_to_long <- function(wide_data, time = "t") +{ + colnames <- names(wide_data) + vars <- subset(colnames, colnames != time) + n <- length(colnames) - 1 + if (!(time %in% colnames)) stop("The data in wide format have to contain a variable named ", time, ".") + long_data <- data.frame( + name = rep(vars, each = length(wide_data[[time]])), + time = rep(wide_data[[time]], n), + value = unlist(wide_data[vars]), + row.names = NULL) + return(long_data) +} diff --git a/R/mkinerrmin.R b/R/mkinerrmin.R new file mode 100644 index 0000000..7a922eb --- /dev/null +++ b/R/mkinerrmin.R @@ -0,0 +1,14 @@ +mkinerrmin <- function(errdata, n.parms, alpha = 0.05) +{ + means.mean <- mean(errdata$value_mean, na.rm=TRUE) + + df = length(errdata$value_mean) - n.parms + + f <- function(err) + { + (sum((errdata$value_mean - errdata$value_pred)^2/((err * means.mean)^2)) - + qchisq(1 - alpha,df))^2 + } + err.min <- optimize(f, c(0.01,0.9))$minimum + return(list(err.min = err.min, n.optim = n.parms, df = df)) +} diff --git a/R/mkinfit.R b/R/mkinfit.R index 9651fd6..c149027 100644 --- a/R/mkinfit.R +++ b/R/mkinfit.R @@ -1,55 +1,78 @@ -mkinfit <- function(mkinmod, observed, +mkinfit <- function(mkinmod, observed, parms.ini = rep(0.1, length(mkinmod$parms)), state.ini = c(100, rep(0, length(mkinmod$diffs) - 1)), - fixed_parms = rep(FALSE, length(mkinmod$parms)), - fixed_initials = c(FALSE, rep(TRUE, length(mkinmod$diffs) - 1)), - plot = NULL, + fixed_parms = NULL, + fixed_initials = names(mkinmod$diffs)[-1], + plot = FALSE, quiet = FALSE, err = NULL, weight = "none", scaleVar = FALSE, ...) { + mod_vars <- names(mkinmod$diffs) + # Subset dataframe with mapped (modelled) variables + observed <- subset(observed, name %in% names(mkinmod$map)) + # Get names of observed variables + obs_vars = unique(as.character(observed$name)) + # Name the parameters if they are not named yet if(is.null(names(parms.ini))) names(parms.ini) <- mkinmod$parms # Create a function calculating the differentials specified by the model mkindiff <- function(t, state, parms) { + time <- t diffs <- vector() - for (box in names(mkinmod$diffs)) + for (box in mod_vars) { diffname <- paste("d", box, sep="_") - diffs[diffname] <- with(as.list(c(state, parms)), + diffs[diffname] <- with(as.list(c(time,state, parms)), eval(parse(text=mkinmod$diffs[[box]]))) } return(list(c(diffs))) } # Name the inital parameter values if they are not named yet - if(is.null(names(state.ini))) names(state.ini) <- names(mkinmod$diffs) + if(is.null(names(state.ini))) names(state.ini) <- mod_vars - # TODO: Collect parameters to be optimised - parms.optim <- parms.ini[!fixed_parms] + # Parameters to be optimised parms.fixed <- parms.ini[fixed_parms] + optim_parms <- setdiff(names(parms.ini), fixed_parms) + parms.optim <- parms.ini[optim_parms] - state.ini.optim <- state.ini[!fixed_initials] - state.ini.optim.boxnames <- names(state.ini.optim) - names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") state.ini.fixed <- state.ini[fixed_initials] + optim_initials <- setdiff(names(state.ini), fixed_initials) + state.ini.optim <- state.ini[optim_initials] + state.ini.optim.boxnames <- names(state.ini.optim) + if(length(state.ini.optim) > 0) { + names(state.ini.optim) <- paste(names(state.ini.optim), "0", sep="_") + } + cost.old <- 1e100 + calls <- 0 + out_predicted <- NA # Define the model cost function cost <- function(P) { + assign("calls", calls+1, inherits=TRUE) if(length(state.ini.optim) > 0) { odeini <- c(P[1:length(state.ini.optim)], state.ini.fixed) names(odeini) <- c(state.ini.optim.boxnames, names(state.ini.fixed)) } else odeini <- state.ini.fixed odeparms <- c(P[(length(state.ini.optim) + 1):length(P)], parms.fixed) - # Solve the ODE + + # Get more timepoints if plotting is desired + if(plot) { + outtimes = unique(sort(c(observed$time, + seq(min(observed$time), max(observed$time), length.out=100)))) + } else outtimes = unique(observed$time) + + # Solve the ode out <- ode( y = odeini, - times = unique(observed$time), + times = outtimes, func = mkindiff, parms = odeparms) - # Output transformation for models with ghost compartments like SFORB + + # Output transformation for models with unobserved compartments like SFORB out_transformed <- data.frame(time = out[,"time"]) for (var in names(mkinmod$map)) { if(length(mkinmod$map[[var]]) == 1) { @@ -58,9 +81,190 @@ mkinfit <- function(mkinmod, observed, out_transformed[var] <- rowSums(out[, mkinmod$map[[var]]]) } } + assign("out_predicted", subset(out_transformed, time %in% observed$time), inherits=TRUE) + + mC <- modCost(out_transformed, observed, y = "value", + err = err, weight = weight, scaleVar = scaleVar) + + # Report and/or plot if the model is improved + if (mC$model < cost.old) { + if(!quiet) cat("Model cost at call ", calls, ": ", mC$model, "\n") + + # Plot the data and the current model output if requested + if(plot) { + plot(0, type="n", + xlim = range(observed$time), ylim = range(observed$value, na.rm=TRUE), + xlab = "Time", ylab = "Observed") + col_obs <- pch_obs <- 1:length(obs_vars) + names(col_obs) <- names(pch_obs) <- obs_vars + for (obs_var in obs_vars) { + points(subset(observed, name == obs_var, c(time, value)), + pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed$time, out_transformed[-1]) + legend("topright", inset=c(0.05, 0.05), legend=obs_vars, + col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) + } - return(modCost(out_transformed, observed, y = "value", - err = err, weight = weight, scaleVar = scaleVar)) + assign("cost.old", mC$model, inherits=TRUE) + } + return(mC) } - modFit(cost, c(state.ini.optim, parms.optim), ...) + fit <- modFit(cost, c(state.ini.optim, parms.optim), ...) + + # We need the function for plotting + fit$mkindiff <- mkindiff + + # We also need various other information for summary and plotting + fit$map <- mkinmod$map + fit$diffs <- mkinmod$diffs + fit$observed <- mkin_long_to_wide(observed) + predicted_long <- mkin_wide_to_long(out_predicted, time = "time") + fit$predicted <- out_predicted + + # Collect initial parameter values in two dataframes + fit$start <- data.frame(initial = c(state.ini.optim, parms.optim)) + fit$start$type = c(rep("state", length(state.ini.optim)), rep("deparm", length(parms.optim))) + if (exists("lower")) fit$start$lower <- lower + if (exists("upper")) fit$start$upper <- upper + + fit$fixed <- data.frame( + value = c(state.ini.fixed, parms.fixed)) + fit$fixed$type = c(rep("state", length(state.ini.fixed)), rep("deparm", length(parms.fixed))) + + # Calculate chi2 error levels according to FOCUS (2006) + means <- aggregate(value ~ time + name, data = observed, mean, na.rm=TRUE) + errdata <- merge(means, predicted_long, by = c("time", "name"), suffixes = c("_mean", "_pred")) + errdata <- errdata[order(errdata$time, errdata$name), ] + errmin.overall <- mkinerrmin(errdata, length(parms.optim) + length(state.ini.optim)) + + errmin <- data.frame(err.min = errmin.overall$err.min, + n.optim = errmin.overall$n.optim, df = errmin.overall$df) + rownames(errmin) <- "All data" + for (obs_var in obs_vars) + { + errdata.var <- subset(errdata, name == obs_var) + n.k.optim <- length(grep(paste("k", obs_var, sep="_"), names(parms.optim))) + n.initials.optim <- length(grep(paste(obs_var, ".*", "_0", sep=""), names(state.ini.optim))) + n.optim <- n.k.optim + n.initials.optim + if ("alpha" %in% names(parms.optim)) n.optim <- n.optim + 1 + if ("beta" %in% names(parms.optim)) n.optim <- n.optim + 1 + errmin.tmp <- mkinerrmin(errdata.var, n.optim) + errmin[obs_var, c("err.min", "n.optim", "df")] <- errmin.tmp + } + fit$errmin <- errmin + + # Calculate dissipation times DT50 and DT90 + parms.all = c(fit$par, parms.fixed) + fit$distimes <- data.frame(DT50 = rep(NA, length(obs_vars)), DT90 = rep(NA, length(obs_vars)), + row.names = obs_vars) + for (obs_var in obs_vars) { + type = names(mkinmod$map[[obs_var]])[1] + if (type == "SFO") { + k_names = grep(paste("k", obs_var, sep="_"), names(parms.all), value=TRUE) + k_tot = sum(parms.all[k_names]) + DT50 = log(2)/k_tot + DT90 = log(10)/k_tot + } + if (type == "FOMC") { + alpha = parms.all["alpha"] + beta = parms.all["beta"] + DT50 = beta * (2^(1/alpha) - 1) + DT90 = beta * (10^(1/alpha) - 1) + } + if (type == "SFORB") { + # FOCUS kinetics (2006), p. 60 f + k_out_names = grep(paste("k", obs_var, "free", sep="_"), names(parms.all), value=TRUE) + k_out_names = setdiff(k_out_names, paste("k", obs_var, "free", "bound", sep="_")) + k_1output = sum(parms.all[[k_out_names]]) + k_12 = parms.all[[paste("k", obs_var, "free", "bound", sep="_")]] + k_21 = parms.all[[paste("k", obs_var, "bound", "free", sep="_")]] + + sqrt_exp = sqrt(1/4 * (k_12 + k_21 + k_1output)^2 + k_12 * k_21 - (k_12 + k_1output) * k_21) + b1 = 0.5 * (k_12 + k_21 + k_1output) + sqrt_exp + b2 = 0.5 * (k_12 + k_21 + k_1output) - sqrt_exp + + SFORB_fraction = function(t) { + ((k_12 + k_21 - b1)/(b2 - b1)) * exp(-b1 * t) + + ((k_12 + k_21 - b2)/(b1 - b2)) * exp(-b2 * t) + } + f_50 <- function(t) (SFORB_fraction(t) - 0.5)^2 + max_DT <- 1000 + DT50.o <- optimize(f_50, c(0.01, max_DT))$minimum + if (abs(DT50.o - max_DT) < 0.01) DT50 = NA else DT50 = DT50.o + f_90 <- function(t) (SFORB_fraction(t) - 0.1)^2 + DT90.o <- optimize(f_90, c(0.01, 1000))$minimum + if (abs(DT90.o - max_DT) < 0.01) DT90 = NA else DT90 = DT90.o + } + fit$distimes[obs_var, ] = c(DT50, DT90) + } + + # Collect observed, predicted and residuals + data <- merge(observed, predicted_long, by = c("time", "name")) + names(data) <- c("time", "variable", "observed", "predicted") + data$residual <- data$observed - data$predicted + data$variable <- ordered(data$variable, levels = obs_vars) + fit$data <- data[order(data$variable, data$time), ] + + class(fit) <- c("mkinfit", "modFit") + return(fit) +} + +summary.mkinfit <- function(object, data = TRUE, distimes = TRUE, cov = FALSE,...) { + ans <- FME:::summary.modFit(object, cov = cov) + ans$diffs <- object$diffs + if(data) ans$data <- object$data + ans$start <- object$start + ans$fixed <- object$fixed + ans$errmin <- object$errmin + if(distimes) ans$distimes <- object$distimes + class(ans) <- c("summary.mkinfit", "summary.modFit") + return(ans) +} + +# Expanded from print.summary.modFit +print.summary.mkinfit <- function(x, digits = max(3, getOption("digits") - 3), ...) { + cat("\nEquations:\n") + print(noquote(as.character(x[["diffs"]]))) + df <- x$df + rdf <- df[2] + + cat("\nStarting values for optimised parameters:\n") + print(x$start) + + cat("\nFixed parameter values:\n") + if(length(x$fixed$value) == 0) cat("None\n") + else print(x$fixed) + + cat("\nOptimised parameters:\n") + printCoefmat(x$par, digits = digits, ...) + + cat("\nResidual standard error:", + format(signif(x$sigma, digits)), "on", rdf, "degrees of freedom\n") + + cat("\nChi2 error levels in percent:\n") + x$errmin$err.min <- 100 * x$errmin$err.min + print(x$errmin, digits=digits,...) + + printdistimes <- !is.null(x$distimes) + if(printdistimes){ + cat("\nEstimated disappearance times\n") + print(x$distimes, digits=digits,...) + } + + printcor <- !is.null(x$cov.unscaled) + if (printcor){ + Corr <- cov2cor(x$cov.unscaled) + rownames(Corr) <- colnames(Corr) <- rownames(x$par) + cat("\nParameter correlation:\n") + print(Corr, digits = digits, ...) + } + + printdata <- !is.null(x$data) + if (printdata){ + cat("\nData:\n") + print(format(x$data, digits = digits, scientific = FALSE,...), row.names = FALSE) + } + + invisible(x) } diff --git a/R/mkinmod.R b/R/mkinmod.R index 7e64f87..643e855 100644 --- a/R/mkinmod.R +++ b/R/mkinmod.R @@ -1,38 +1,53 @@ -mkinmod <- function(spec = list(parent = list(type = "SFO", to = NA, sink = TRUE))) +mkinmod <- function(...) { - if(!is.list(spec)) stop("spec must be a list of model specifications for each observed variable") + spec <- list(...) + obs_vars <- names(spec) - # The returned model will be a list of two character vectors, containing - # differential equations and parameter names + # The returned model will be a list of character vectors, containing + # differential equations, parameter names and a mapping from model variables + # to observed variables parms <- vector() diffs <- vector() map <- list() # Establish list of differential equations - for (varname in names(spec)) + for (varname in obs_vars) { - if(!spec[[varname]]$type %in% c("SFO", "SFORB")) stop("Available types are SFO and SFORB only") + if(is.null(spec[[varname]]$type)) stop("Every argument to mkinmod must be a list containing a type component") + if(!spec[[varname]]$type %in% c("SFO", "FOMC", "SFORB")) stop("Available types are SFO, FOMC and SFORB only") new_parms <- vector() # New (sub)compartments (boxes) needed for the model type new_boxes <- switch(spec[[varname]]$type, SFO = varname, + FOMC = varname, SFORB = paste(varname, c("free", "bound"), sep="_") ) map[[varname]] <- new_boxes + names(map[[varname]]) <- rep(spec[[varname]]$type, length(new_boxes)) # Start a new differential equation for each new box new_diffs <- paste("d_", new_boxes, " =", sep="") # Construct terms for transfer to sink and add if appropriate + if(is.null(spec[[varname]]$sink)) spec[[varname]]$sink <- TRUE if(spec[[varname]]$sink) { - k_compound_sink <- paste("k", new_boxes[[1]], "sink", sep="_") - sink_term <- paste("-", k_compound_sink, "*", new_boxes[[1]]) - # Only add sink term to first (or only) box for SFO and SFORB models + # Add first-order sink term to first (or only) box for SFO and SFORB models if(spec[[varname]]$type %in% c("SFO", "SFORB")) { + k_compound_sink <- paste("k", new_boxes[[1]], "sink", sep="_") + sink_term <- paste("-", k_compound_sink, "*", new_boxes[[1]]) new_diffs[[1]] <- paste(new_diffs[[1]], sink_term) + new_parms <- k_compound_sink + } + if(spec[[varname]]$type == "FOMC") { + if(match(varname, obs_vars) != 1) { + stop("Type FOMC is only allowed for the first compartment, which is assumed to be the source compartment") + } + # From p. 53 of the FOCUS kinetics report + fomc_term <- paste("(alpha/beta) * ((time/beta) + 1)^-1 *", new_boxes[[1]]) + new_diffs[[1]] <- paste(new_diffs[[1]], "-", fomc_term) + new_parms <- c("alpha", "beta") } - new_parms <- k_compound_sink } # Add reversible binding if appropriate @@ -53,24 +68,41 @@ mkinmod <- function(spec = list(parent = list(type = "SFO", to = NA, sink = TRUE } # Transfer between compartments - for (varname in names(spec)) { + for (varname in obs_vars) { to <- spec[[varname]]$to - if(!is.na(to)) { + if(!is.null(to)) { origin_box <- switch(spec[[varname]]$type, SFO = varname, + FOMC = varname, SFORB = paste(varname, "free", sep="_")) + fraction_left <- NULL for (target in to) { target_box <- switch(spec[[target]]$type, SFO = target, SFORB = paste(target, "free", sep="_")) + if(spec[[varname]]$type %in% c("SFO", "SFORB")) { + k_from_to <- paste("k", origin_box, target_box, sep="_") + diffs[[origin_box]] <- paste(diffs[[origin_box]], "-", k_from_to, "*", origin_box) + diffs[[target_box]] <- paste(diffs[[target_box]], "+", k_from_to, "*", origin_box) + parms <- c(parms, k_from_to) + } + if(spec[[varname]]$type == "FOMC") { + fraction_to_target = paste("f_to", target, sep="_") + fraction_not_to_target = paste("(1 - ", fraction_to_target, ")", sep="") + if(is.null(fraction_left)) { + fraction_really_to_target = fraction_to_target + fraction_left = fraction_not_to_target + } else { + fraction_really_to_target = paste(fraction_left, " * ", fraction_to_target, sep="") + fraction_left = paste(fraction_left, " * ", fraction_not_to_target, sep="") + } + diffs[[target_box]] <- paste(diffs[[target_box]], "+", fraction_really_to_target, "*", fomc_term) + parms <- c(parms, fraction_to_target) + } } - k_from_to <- paste("k", origin_box, target_box, sep="_") - diffs[[origin_box]] <- paste(diffs[[origin_box]], "-", k_from_to, "*", origin_box) - diffs[[target_box]] <- paste(diffs[[target_box]], "+", k_from_to, "*", origin_box) - parms <- c(parms, k_from_to) } } model <- list(diffs = diffs, parms = parms, map = map) class(model) <- "mkinmod" - return(model) + invisible(model) } diff --git a/R/mkinplot.R b/R/mkinplot.R new file mode 100644 index 0000000..d2880c0 --- /dev/null +++ b/R/mkinplot.R @@ -0,0 +1,49 @@ +mkinplot <- function(fit, xlab = "Time", ylab = "Observed", xlim = range(fit$data$time), ylim = range(fit$data$observed, na.rm = TRUE), ...) +{ + fixed <- fit$fixed$value + names(fixed) <- rownames(fit$fixed) + parms.all <- c(fit$par, fixed) + ininames <- c( + rownames(subset(fit$start, type == "state")), + rownames(subset(fit$fixed, type == "state"))) + odeini <- parms.all[ininames] + names(odeini) <- names(fit$diffs) + + outtimes <- seq(xlim[1], xlim[2], length.out=100) + + odenames <- c( + rownames(subset(fit$start, type == "deparm")), + rownames(subset(fit$fixed, type == "deparm"))) + odeparms <- parms.all[odenames] + + # Solve the ode + out <- ode( + y = odeini, + times = outtimes, + func = fit$mkindiff, + parms = odeparms) + + # Output transformation for models with unobserved compartments like SFORB + out_transformed <- data.frame(time = out[,"time"]) + for (var in names(fit$map)) { + if(length(fit$map[[var]]) == 1) { + out_transformed[var] <- out[, var] + } else { + out_transformed[var] <- rowSums(out[, fit$map[[var]]]) + } + } + + # Plot the data and model output + plot(0, type="n", + xlim = xlim, ylim = ylim, + xlab = xlab, ylab = ylab, ...) + col_obs <- pch_obs <- 1:length(fit$map) + names(col_obs) <- names(pch_obs) <- names(fit$map) + for (obs_var in names(fit$map)) { + points(subset(fit$data, variable == obs_var, c(time, observed)), + pch = pch_obs[obs_var], col = col_obs[obs_var]) + } + matlines(out_transformed$time, out_transformed[-1]) + legend("topright", inset=c(0.05, 0.05), legend=names(fit$map), + col=col_obs, pch=pch_obs, lty=1:length(pch_obs)) +} -- cgit v1.2.1