drdata <- function(substances, experimentator = "%", db = "cytotox",
celltype="IPC-81",enzymetype="AChE",
organism="Vibrio fischeri",endpoint="Luminescence",whereClause="1",
ok="'ok','no fit'")
{
library(RODBC)
channel <- odbcConnect(db,uid="cytotox",pwd="cytotox",case="tolower")
slist <- paste(substances,collapse="','")
if (db == "cytotox") {
responsetype <- "viability"
testtype <- "celltype"
type <- celltype
} else {
if (db == "enzymes") {
responsetype <- "activity"
testtype <- "enzyme"
type <- enzymetype
} else {
responsetype <- "response"
testtype <- "organism"
type <- organism
}
}
query <- paste("SELECT conc,",responsetype,",unit,experimentator,substance,",testtype,
",ok FROM ", db, " WHERE substance IN ('",
slist,"') AND experimentator LIKE '",
experimentator,"' AND ",testtype," LIKE '",
type,"' AND ",
whereClause," AND ok in (",
ok,")",sep="")
if (db == "ecotox") query <- paste(query," AND type LIKE '",endpoint,"'",sep="")
data <- sqlQuery(channel,query)
odbcClose(channel)
names(data)[[1]] <- "dose"
names(data)[[2]] <- "response"
data$substance <- factor(data$substance,levels=substances)
return(data)
}
linlogitf <- function(x,k,f,mu,b)
{
k*(1 + f*x) / (1 + ((2*f*(10^mu) + 1) * ((x/(10^mu))^b)))
}
drfit <- function(data, startlogED50 = NA, chooseone=TRUE,
probit = TRUE, logit = FALSE, weibull = FALSE,
linlogit = FALSE, linlogitWrong = NA, allWrong = NA,
s0 = 0.5, b0 = 2, f0 = 0)
{
if(!is.null(data$ok)) data <- subset(data,ok!="no fit") # Don't use data with
# ok set to "no fit"
substances <- levels(data$substance)
ri <- rix <- 0 # ri is the index over the result rows
# rix is used later to check if any
# model result was appended
rsubstance <- array() # the substance names in the results
rndl <- vector() # number of dose levels
rn <- vector() # mean number of replicates
# in each dose level
runit <- vector() # vector of units for each result row
rlhd <- rlld <- vector() # highest and lowest doses tested
mtype <- array() # the modeltypes
sigma <- array() # the standard deviation of the residuals
logED50 <- vector()
stderrlogED50 <- vector()
a <- b <- c <- vector()
splitted <- split(data,data$substance)
for (i in substances) {
tmp <- splitted[[i]]
fit <- FALSE
if (length(tmp) != 0) {
unit <- levels(as.factor(as.vector(tmp$unit)))
cat("\n",i,": Fitting data...\n",sep="")
} else {
unit <- ""
cat("\n",i,": No data\n",sep="")
}
if (length(unit) > 1) {
cat("More than one unit for substance ",i,", halting\n\n",sep="")
break
}
if (length(tmp$response) == 0) {
nodata = TRUE
} else {
nodata = FALSE
}
rix <- ri
if (nodata) {
n <- ndl <- 0
} else {
ndl <- length(levels(factor(tmp$dose)))
n <- round(length(tmp$response)/ndl)
if (is.na(startlogED50[i])){
w <- 1/abs(tmp$response - 0.3)
startlogED50[[i]] <- sum(w * log10(tmp$dose))/sum(w)
}
highestdose <- max(tmp$dose)
lowestdose <- min(tmp$dose)
lhd <- log10(highestdose)
lld <- log10(lowestdose)
responseathighestdose <- mean(subset(tmp,dose==highestdose)$response)
responseatlowestdose <- mean(subset(tmp,dose==lowestdose)$response)
if (responseathighestdose < 0.5) {
inactive <- FALSE
if (responseatlowestdose < 0.5) {
active <- TRUE
} else {
active <- FALSE
if (linlogit &&
length(subset(linlogitWrong,linlogitWrong == i))==0 &&
length(subset(allWrong,allWrong == i))==0) {
m <- try(nls(response ~ linlogitf(dose,1,f,logED50,b),
data=tmp,
start=list(f=f0,logED50=startlogED50[[i]],b=b0)))
if (!inherits(m, "try-error")) {
fit <- TRUE
ri <- ri + 1
s <- summary(m)
sigma[[ri]] <- s$sigma
rsubstance[[ri]] <- i
rndl[[ri]] <- ndl
rn[[ri]] <- n
runit[[ri]] <- unit
rlld[[ri]] <- log10(lowestdose)
rlhd[[ri]] <- log10(highestdose)
logED50[[ri]] <- coef(m)[["logED50"]]
if (logED50[[ri]] > rlhd[[ri]]) {
mtype[[ri]] <- "no fit"
logED50[[ri]] <- NA
stderrlogED50[[ri]] <- NA
a[[ri]] <- NA
b[[ri]] <- NA
c[[ri]] <- NA
} else {
mtype[[ri]] <- "linlogit"
stderrlogED50[[ri]] <- s$parameters["logED50","Std. Error"]
a[[ri]] <- coef(m)[["logED50"]]
b[[ri]] <- coef(m)[["b"]]
c[[ri]] <- coef(m)[["f"]]
}
}
}
if (probit &&
length(subset(allWrong,allWrong == i))==0) {
m <- try(nls(response ~ pnorm(-log10(dose),-logED50,scale),
data=tmp,
start=list(logED50=startlogED50[[i]],scale=1)))
if (chooseone==FALSE || fit==FALSE) {
if (!inherits(m, "try-error")) {
fit <- TRUE
ri <- ri + 1
s <- summary(m)
sigma[[ri]] <- s$sigma
rsubstance[[ri]] <- i
rndl[[ri]] <- ndl
rn[[ri]] <- n
runit[[ri]] <- unit
rlld[[ri]] <- log10(lowestdose)
rlhd[[ri]] <- log10(highestdose)
logED50[[ri]] <- coef(m)[["logED50"]]
c[[ri]] <- NA
if (logED50[[ri]] > rlhd[[ri]]) {
mtype[[ri]] <- "no fit"
logED50[[ri]] <- NA
stderrlogED50[[ri]] <- NA
a[[ri]] <- NA
b[[ri]] <- NA
} else {
mtype[[ri]] <- "probit"
stderrlogED50[[ri]] <- s$parameters["logED50","Std. Error"]
a[[ri]] <- coef(m)[["logED50"]]
b[[ri]] <- coef(m)[["scale"]]
}
}
}
}
if (logit &&
length(subset(allWrong,allWrong == i))==0) {
m <- try(nls(response ~ plogis(-log10(dose),-logED50,scale),
data=tmp,
start=list(logED50=startlogED50[[i]],scale=1)))
if (chooseone==FALSE || fit==FALSE) {
if (!inherits(m, "try-error")) {
fit <- TRUE
ri <- ri + 1
s <- summary(m)
sigma[[ri]] <- s$sigma
rsubstance[[ri]] <- i
rndl[[ri]] <- ndl
rn[[ri]] <- n
runit[[ri]] <- unit
rlld[[ri]] <- log10(lowestdose)
rlhd[[ri]] <- log10(highestdose)
logED50[[ri]] <- a[[ri]] <- coef(m)[["logED50"]]
b[[ri]] <- coef(m)[["scale"]]
c[[ri]] <- NA
if (logED50[[ri]] > rlhd[[ri]]) {
mtype[[ri]] <- "no fit"
logED50[[ri]] <- NA
stderrlogED50[[ri]] <- NA
a[[ri]] <- NA
b[[ri]] <- NA
} else {
mtype[[ri]] <- "logit"
stderrlogED50[[ri]] <- s$parameters["logED50","Std. Error"]
}
}
}
}
if (weibull &&
length(subset(allWrong,allWrong == i))==0) {
m <- try(nls(response ~ pweibull(-log10(dose)+location,shape),
data=tmp,
start=list(location=startlogED50[[i]],shape=s0)))
if (chooseone==FALSE || fit==FALSE) {
if (!inherits(m, "try-error")) {
fit <- TRUE
ri <- ri + 1
s <- summary(m)
sigma[[ri]] <- s$sigma
rsubstance[[ri]] <- i
rndl[[ri]] <- ndl
rn[[ri]] <- n
runit[[ri]] <- unit
rlld[[ri]] <- log10(lowestdose)
rlhd[[ri]] <- log10(highestdose)
a[[ri]] <- coef(m)[["location"]]
b[[ri]] <- coef(m)[["shape"]]
sqrdev <- function(logdose) {
(0.5 - pweibull( - logdose + a[[ri]], b[[ri]]))^2
}
logED50[[ri]] <- nlm(sqrdev,startlogED50[[i]])$estimate
c[[ri]] <- NA
if (logED50[[ri]] > rlhd[[ri]]) {
mtype[[ri]] <- "no fit"
logED50[[ri]] <- NA
stderrlogED50[[ri]] <- NA
a[[ri]] <- NA
b[[ri]] <- NA
} else {
mtype[[ri]] <- "weibull"
stderrlogED50[[ri]] <- NA
}
}
}
}
}
} else {
inactive <- TRUE
}
}
if (ri == rix) { # if no entry was appended for this substance
ri <- ri + 1
rsubstance[[ri]] <- i
rndl[[ri]] <- ndl
rn[[ri]] <- n
if (nodata) {
rlld[[ri]] <- rlhd[[i]] <- NA
mtype[[ri]] <- "no data"
runit[[ri]] <- NA
} else {
rlld[[ri]] <- log10(lowestdose)
rlhd[[i]] <- log10(highestdose)
runit[[ri]] <- unit
if (inactive) {
mtype[[ri]] <- "inactive"
} else {
if (active) {
mtype[[ri]] <- "active"
} else {
mtype[[ri]] <- "no fit"
}
}
}
sigma[[ri]] <- NA
logED50[[ri]] <- NA
stderrlogED50[[ri]] <- NA
a[[ri]] <- NA
b[[ri]] <- NA
c[[ri]] <- NA
}
}
results <- data.frame(rsubstance, rndl, rn, rlld, rlhd, mtype, logED50, stderrlogED50, runit, sigma, a, b)
names(results) <- c("Substance","ndl","n","lld","lhd","mtype","logED50","std","unit","sigma","a","b")
if (linlogit) {
results$c <- c
}
rownames(results) <- 1:ri
return(results)
}
drplot <- function(drresults, data,
dtype = "std", alpha = 0.95, ctype = "none",
path = "./", fileprefix = "drplot", overlay = FALSE,
postscript = FALSE, png = FALSE, pdf = FALSE,
bw = TRUE,
pointsize = 12,
colors = 1:8, devoff=T, lpos="topright")
{
# Check if all data have the same unit
unitlevels <- levels(as.factor(drresults$unit))
if (length(unitlevels) == 1) {
unit <- unitlevels
} else {
unit <- "different units"
}
# Get the plot limits on the x-axis (log of the dose) and y axis
if(is.data.frame(data)) {
nonzerodata <- subset(data,dose!=0)
nonzerodata$substance <- factor(nonzerodata$substance) # Get rid of pseudo substance names of controls
zerodata <- subset(data,dose==0)
nc <- length(zerodata$dose) # Number of control points
if (nc > 0) {
sdc <- sd(zerodata$response) # Standard deviation of control responses
controlconf <- sdc * qt((1 + alpha)/2, nc - 1) / sqrt(nc)
cat("There are ",nc,"data points with dose 0 (control values)\n")
cat("with a standard deviation of",sdc,"\n")
cat("and a confidence interval of",controlconf,"\n")
if (nc < 3) {
cat("\nThere are less than 3 control points, therefore their scatter\n")
cat("will not be displayed\n")
ctype = "none"
}
}
lld <- log10(min(nonzerodata$dose))
lhd <- log10(max(nonzerodata$dose))
hr <- max(nonzerodata$response)
if (ctype == "std") hr <- max(hr,1 + sdc)
if (ctype == "conf") hr <- max(hr,1 + controlconf)
dsubstances <- levels(nonzerodata$substance)
} else {
lld <- min(drresults[["logED50"]],na.rm=TRUE) - 2
lhd <- max(drresults[["logED50"]],na.rm=TRUE) + 2
if (length(subset(drresults,mtype=="linlogit")$Substance) != 0) {
hr <- 1.8
} else {
hr <- 1.0
}
}
# Prepare overlay plot if requested
if (overlay)
{
if (postscript) {
filename = paste(path,fileprefix,".eps",sep="")
postscript(file=filename,
paper="special",width=7,height=7,horizontal=FALSE, pointsize=pointsize)
cat("Created File: ",filename,"\n")
}
if (pdf) {
filename = paste(path,fileprefix,".pdf",sep="")
pdf(file=filename,
paper="special",width=7,height=7,horizontal=FALSE, pointsize=pointsize)
cat("Created File: ",filename,"\n")
}
if (png) {
filename = paste(path,fileprefix,".png",sep="")
png(filename=filename,
width=500, height=500, pointsize=pointsize)
cat("Created File: ",filename,"\n")
}
if (!postscript && !png && !pdf) {
get(getOption("device"))(width=7,height=7)
}
plot(0,type="n",
xlim=c(lld - 0.5, lhd + 1),
ylim= c(-0.1, hr + 0.2),
xlab=paste("Decadic Logarithm of the dose in ", unit),
ylab="Normalized response")
}
# Plot the data either as raw data or as error bars
if(is.data.frame(data)) {
splitted <- split(nonzerodata,nonzerodata$substance)
# n is the index for the dose-response curves
n <- 0
if (bw) colors <- rep("black",length(dsubstances))
# Loop over the substances in the data
for (i in dsubstances) {
n <- n + 1
tmp <- splitted[[i]]
if (length(tmp$response) != 0) {
color <- colors[[n]]
# Prepare the single graphs if an overlay is not requested
if (!overlay)
{
if (postscript) {
filename = paste(path,fileprefix,sub(" ","_",i),".eps",sep="")
postscript(file=filename,
paper="special",width=7,height=7,horizontal=FALSE,pointsize=pointsize)
cat("Created File: ",filename,"\n")
}
if (pdf) {
filename = paste(path,fileprefix,sub(" ","_",i),".pdf",sep="")
pdf(file=filename,
paper="special",width=7,height=7,horizontal=FALSE,pointsize=pointsize)
cat("Created File: ",filename,"\n")
}
if (png) {
filename = paste(path,fileprefix,sub(" ","_",i),".png",sep="")
png(filename=filename,
width=500, height=500, pointsize=pointsize)
cat("Created File: ",filename,"\n")
}
if (!postscript && !png && !pdf) {
get(getOption("device"))(width=7,height=7)
}
plot(0,type="n",
xlim=c(lld - 0.5, lhd + 2),
ylim= c(-0.1, hr + 0.2),
xlab=paste("Decadic Logarithm of the dose in ", unit),
ylab="Normalized response")
}
if (!overlay) legend(lpos, i,lty = 1, col = color, inset=0.05)
tmp$dosefactor <- factor(tmp$dose) # necessary because the old
# factor has all levels, not
# only the ones tested with
# this substance
# Plot the control lines, if requested
if (ctype == "std") {
abline(h = 1 - sdc, lty = 2)
abline(h = 1 + sdc, lty = 2)
}
if (ctype == "conf") {
abline(h = 1 - controlconf, lty = 2)
abline(h = 1 + controlconf, lty = 2)
}
# Plot the data, if requested
if (dtype != "none") {
if (dtype == "raw") {
points(log10(tmp$dose),tmp$response,col=color)
} else {
splitresponses <- split(tmp$response,tmp$dosefactor)
means <- sapply(splitresponses,mean)
lengths <- sapply(splitresponses,length)
vars <- sapply(splitresponses,var)
standarddeviations <- sqrt(vars)
}
if (dtype == "std")
{
tops <- means + standarddeviations
bottoms <- means - standarddeviations
}
if (dtype == "conf")
{
confidencedeltas <- qt((1 + alpha)/2, lengths - 1) * sqrt(vars)
tops <- means + confidencedeltas
bottoms <- means - confidencedeltas
}
if (dtype != "raw")
{
x <- log10(as.numeric(levels(tmp$dosefactor)))
segments(x,bottoms,x,tops,col=color)
points(x,means,col=color)
smidge <- 0.05
segments(x - smidge,bottoms,x + smidge,bottoms,col=color)
segments(x - smidge,tops,x + smidge,tops,col=color)
}
}
# Plot the fits, if there are any
fits <- subset(drresults,Substance == i)
nf <- length(fits$Substance) # number of fits to plot
if (nf > 0) {
for (j in 1:nf)
{
logED50 <- fits[j,"logED50"]
mtype <- as.character(fits[j, "mtype"])
if (mtype == "probit") {
scale <- fits[j,"b"]
plot(function(x) pnorm(-x,-logED50,scale),lld - 0.5, lhd + 2, add=TRUE,col=color)
}
if (mtype == "logit") {
scale <- fits[j,"b"]
plot(function(x) plogis(-x,-logED50,scale),lld - 0.5, lhd + 2, add=TRUE,col=color)
}
if (mtype == "weibull") {
location <- fits[j,"a"]
shape <- fits[j,"b"]
plot(function(x) pweibull(-x+location,shape),lld - 0.5, lhd + 2, add=TRUE,col=color)
}
if (mtype == "linlogit") {
plot(function(x) linlogitf(10^x,1,fits[j,"c"],fits[j,"logED50"],fits[j,"b"]),
lld - 0.5, lhd + 2,
add=TRUE,col=color)
}
}
}
if (!overlay && (postscript || png || pdf)) dev.off()
} else {
cat("No data for ",i,"\n")
}
}
}
if (overlay) legend(lpos, dsubstances,lty = 1, col = colors, inset=0.05)
if (overlay && (postscript || png || pdf)) {
if (devoff) {
dev.off()
}
}
}
checkplate <- function(plate,db="cytotox")
{
library(RODBC)
channel <- odbcConnect(db,uid="cytotox",pwd="cytotox",case="tolower")
if (db == "cytotox") {
responsetype <- "viability"
testtype <- "celltype"
} else {
responsetype <- "activity"
testtype <- "enzyme"
}
platequery <- paste("SELECT experimentator,substance,",testtype,",conc,unit,",responsetype,",performed,ok",
"FROM ",db," WHERE plate=", plate)
controlquery <- paste("SELECT type,response FROM controls WHERE plate=",plate)
platedata <- sqlQuery(channel,platequery)
controldata <- sqlQuery(channel,controlquery)
odbcClose(channel)
if (length(platedata$experimentator) < 1) {
cat("There is no response data for plate ",plate," in database ",db,"\n")
} else {
platedata$experimentator <- factor(platedata$experimentator)
platedata$type <- factor(platedata[[testtype]])
platedata$substance <- factor(platedata$substance)
platedata$unit <- factor(platedata$unit)
platedata$performed <- factor(platedata$performed)
platedata$ok <- factor(platedata$ok)
blinds <- subset(controldata,type=="blind")
controls <- subset(controldata,type=="control")
numberOfBlinds <- length(blinds$response)
numberOfControls <- length(controls$response)
meanOfBlinds <- mean(blinds$response)
meanOfControls <- mean(controls$response)
stdOfBlinds <- sd(blinds$response)
stdOfControls <- sd(controls$response)
percentstdOfcontrols <-stdOfControls *100/meanOfControls
cat("Plate ",plate," from database ",db,"\n",
"\tExperimentator: ",levels(platedata$experimentator),"\n",
"\tType(s): ",levels(platedata$type),"\n",
"\tPerformed on : ",levels(platedata$performed),"\n",
"\tSubstance(s): ",levels(platedata$substance),"\n",
"\tConcentration unit: ",levels(platedata$unit),"\n",
"\tOK: ",levels(platedata$ok),"\n",
"\t\tNumber \tMean \t\tStandard Deviation \t% Standard Deviation \n",
"\tblind\t",numberOfBlinds,"\t",meanOfBlinds,"\t",stdOfBlinds,"\n",
"\tcontrol\t",numberOfControls,"\t",meanOfControls,"\t",stdOfControls,"\t\t",percentstdOfcontrols,"\n")
par(ask=TRUE)
boxplot(blinds$response,controls$response,names=c("blinds","controls"),ylab="Response",main=paste("Plate ",plate))
drdata <- platedata[c(2,4,6)]
drdata$substance <- factor(drdata$substance)
substances <- levels(drdata$substance)
plot(log10(drdata$conc),drdata$viability,
xlim=c(-2.5, 4.5),
ylim= c(-0.1, 2),
xlab=paste("decadic logarithm of the concentration in ",levels(platedata$unit)),
ylab=responsetype)
drdatalist <- split(drdata,drdata$substance)
for (i in 1:length(drdatalist)) {
points(log10(drdatalist[[i]]$conc),drdatalist[[i]][[responsetype]],col=i);
}
legend("topleft",substances, pch=1, col=1:length(substances), inset=0.05)
title(main=paste("Plate ",plate," - ",levels(platedata$experimentator)," - ",levels(platedata$type)))
}
}
checksubstance <- function(substance,db="cytotox",experimentator="%",celltype="%",enzymetype="%",whereClause="1",ok="%")
{
library(RODBC)
channel <- odbcConnect(db,uid="cytotox",pwd="cytotox",case="tolower")
if (db == "cytotox") {
responsetype <- "viability"
testtype <- "celltype"
type <- celltype
} else {
responsetype <- "activity"
testtype <- "enzyme"
type <- enzymetype
}
query <- paste("SELECT experimentator,substance,",testtype,",plate,conc,unit,",responsetype,",ok",
" FROM ",db," WHERE substance LIKE '",
substance,"' AND experimentator LIKE '",
experimentator,"' AND ",testtype," LIKE '",
type,"' AND ",
whereClause," AND ok LIKE '",ok,"'",sep="")
data <- sqlQuery(channel,query)
odbcClose(channel)
data$experimentator <- factor(data$experimentator)
data$substance <- factor(data$substance)
substances <- levels(data$substance)
data$type <- factor(data[[testtype]])
data$plate <- factor(data$plate)
plates <- levels(data$plate)
concentrations <- split(data$conc,data$conc)
concentrations <- as.numeric(names(concentrations))
data$unit <- factor(data$unit)
data$ok <- factor(data$ok)
if (length(plates)>6) {
palette(rainbow(length(plates)))
}
plot(log10(data$conc),data[[responsetype]],
xlim=c(-2.5, 4.5),
ylim= c(-0.1, 2),
xlab=paste("decadic logarithm of the concentration in ",levels(data$unit)),
ylab=responsetype)
platelist <- split(data,data$plate)
for (i in 1:length(platelist)) {
points(log10(platelist[[i]]$conc),platelist[[i]][[responsetype]],col=i);
}
legend("topleft", plates, pch=1, col=1:length(plates), inset=0.05)
title(main=paste(substance," - ",levels(data$experimentator)," - ",levels(data$type)))
cat("Substanz ",substance,"\n",
"\tExperimentator(s):",levels(data$experimentator),"\n",
"\tType(s):\t",levels(data$type),"\n",
"\tSubstance(s):\t",levels(data$substance),"\n",
"\tPlate(s):\t",plates,"\n\n")
}