#
# This file contains R commands that reproduce the analyses 
# presented in Chapter 7
#
# Read data on root-length of plants grown in glyphosate-polluted water
#
glyphosate<-read.table("../data/glyphosate.data",header=F)
names(glyphosate)<-c("concentration","water","rep","rootlength")
#
# Plot root-length against glyphosate concentration, using different plotting
# symbols to differentiate plants grown in distilled or tap water (Figure 7.1)
#
pdf(file="glyphosate_data.pdf",height=6,width=8)
plot(glyphosate$conc,glyphosate$rootlength,type="n",xlab="concentration",ylab="root-length",
   cex.axis=1.5,cex.lab=1.5)
take<-glyphosate$water==1
points(glyphosate$conc[take],glyphosate$rootlength[take],pch=19,cex=1.5)
take<-glyphosate$water==2
points(glyphosate$conc[take],glyphosate$rootlength[take],pch=1,cex=1.5)
dev.off()
#
# Small-scale illustration of linear and non-linear relationships (Figure 7.2)
#
set.seed(2347393)
x.fict<-c(0.5,1.5,1.5,2.5)
y1.fict<-x.fict+1.5*rnorm(4)
y2.fict<-y1.fict
y2.fict[2]<-max(c(y2.fict[2],y2.fict[3]))
y2.fict[3]<-y2.fict[2]+0.2
pdf(file="fictitious.pdf",height=6,width=8)
par(mfrow=c(1,2))
plot(x.fict,y1.fict,xlab="x",ylab="y",pch=19,cex=1.5,cex.axis=1.5,cex.lab=1.5,xlim=c(0,3),ylim=c(-1,5))
plot(x.fict,y2.fict,xlab="x",ylab="y",pch=19,cex=1.5,cex.axis=1.5,cex.lab=1.5,xlim=c(0,3),ylim=c(-1,5))
dev.off()
#
# schematic of the simple linear model (Figure 7.3)
#
set.seed(481689)
x<-0.5*(1:10)
alpha<-3
beta<-4
sigma<-5.0
n<-length(x)
pdf(file="linear_schematic.pdf",height=6,width=8)
par(pty="s")
z<-sigma*rnorm(n)
y<-alpha+beta*x+z
plot(x,y,pch=19,cex=1.5,cex.lab=1.5,cex.axis=1.5,xlim=c(0,5.5),ylim=c(0,25))
lines(c(0,x,5.5),alpha+beta*c(0,x,5.5),lwd=2)
lines(c(1,2),alpha+beta*c(1,1))
lines(c(2,2),alpha+beta*c(1,2))
lines(c(x[8],x[8]),c(y[8]+0.5,alpha+beta*x[8]-0.5))
lines(c(x[8]-0.1,x[8],x[8]+0.1),c(y[8]+1,y[8]+0.5,y[8]+1))
y0<-alpha+beta*x[8]
lines(c(x[8]-0.1,x[8],x[8]+0.1),c(y0-1,y0-0.5,y0-1))
y0<-alpha+beta
lines(c(2.1,2.2,2.3),c(y0+0.5,y0,y0+0.5))
y0<-alpha+2*beta
lines(c(2.1,2.2,2.3),c(y0-0.5,y0,y0-0.5))
lines(c(2.2,2.2),c(alpha+beta,alpha+2*beta))
lines(c(-0.1,0.0,0.1),c(0.75,0.25,0.75))
lines(c(-0.1,0.0,0.1),c(alpha-0.75,alpha-0.25,alpha-0.75))
lines(c(0.0,0.0),c(0.25,alpha-0.255))
text(x=0.5,y=alpha/2,labels="alpha")
text(x=2.5,y=alpha+1.5*beta,labels="beta")
text(x=x[8]+0.2,y=(alpha+beta*x[8]+y[8])/2,labels="z")
lines(c(0,5.5),c(0,0),lty=2)
dev.off()
#
# The Normal probability density funciton with mean 5 and standard 
# deviation 1 (Figure 7.4)
mu<-5
sigma<-1
x<-mu+sigma*0.01*((-400):400)
y<-dnorm(x,mu,sigma)
pdf("Normal-pdf.pdf",height=5,width=6)
plot(x,y,type="l",lwd=2,xlab="x",ylab="pdf",xaxp=c(1,9,8))
lines(c(mu,mu),c(0,0.04),lwd=1.5)
lines(c(mu-0.1,mu,mu+0.1),c(0.015,0,0.015),lwd=1.5)
text(mu,0.065,"mu")
lines(c(4.5,5.5),c(0.2,0.2),lwd=1.5)
lines(c(4.6,4.5,4.6),c(0.207,0.2,0.193),lwd=1.5)
lines(c(5.4,5.5,5.4),c(0.207,0.2,0.193),lwd=1.5)
text(5,0.235,"sigma")
dev.off()
#
# Three small-scale simulations of the simple linear model (Figure 7.5)
#
set.seed(480713)
x<-1:10
alpha<-3
beta<-2
sigma<-2.0
n<-length(x)
pdf(file="linear_sims.pdf",paper="special",width=6,height=4)
par(mfrow=c(1,3),pty="s")
for (i in 1:3) {
   z<-sigma*rnorm(n)
   y<-alpha+beta*x+z
   estimates<-lm(y~x)$coef
   yhat<-estimates[1]+estimates[2]*x
   plot(x,y,pch=19,xlim=c(0,11),ylim=c(0,25))
   lines(x,alpha+beta*x)
   lines(x,yhat,lty=2)
   }
dev.off()
#
# Plots to show how transformations of the data change the shape of the relationship
# between root-length and glyphosate concentration (Figures 7.6 and 7.7)
#
glyphosate<-read.table("../data/glyphosate.data",header=F)
names(glyphosate)<-c("concentration","water","rep","rootlength")
pdf(file="glyphosate_transformed.pdf",height=6,width=8)
par(mfrow=c(1,2))
plot(glyphosate$conc,glyphosate$rootlength,type="n",xlab="concentration",ylab="root-length",
   cex.axis=1.5,cex.lab=1.5)
take<-glyphosate$water==1
points(glyphosate$conc[take],glyphosate$rootlength[take],pch=19,cex=1.5)
take<-glyphosate$water==2
points(glyphosate$conc[take],glyphosate$rootlength[take],pch=1,cex=1.5)
plot(log(1+glyphosate$conc),glyphosate$rootlength,type="n",xlab="log(1+concentration)",ylab="root-length",
   cex.axis=1.5,cex.lab=1.5)
take<-glyphosate$water==1
points(log(1+glyphosate$conc[take]),glyphosate$rootlength[take],pch=19,cex=1.5)
take<-glyphosate$water==2
points(log(1+glyphosate$conc[take]),glyphosate$rootlength[take],pch=1,cex=1.5)
dev.off()
glyphosate<-read.table("../data/glyphosate.data",header=F)
names(glyphosate)<-c("concentration","water","rep","rootlength")
pdf(file="glyphosate_transformed2.pdf",height=6,width=8)
par(mfrow=c(1,2))
plot(glyphosate$conc,glyphosate$rootlength,type="n",xlab="concentration",ylab="root-length",
   cex.axis=1.5,cex.lab=1.5)
take<-glyphosate$water==1
points(glyphosate$conc[take],glyphosate$rootlength[take],pch=19,cex=1.5)
take<-glyphosate$water==2
points(glyphosate$conc[take],glyphosate$rootlength[take],pch=1,cex=1.5)
plot(log(1+glyphosate$conc),log(glyphosate$rootlength),type="n",xlab="log(1+concentration)",ylab="log(root-length)",
   cex.axis=1.5,cex.lab=1.5)
take<-glyphosate$water==1
points(log(1+glyphosate$conc[take]),log(glyphosate$rootlength[take]),pch=19,cex=1.5)
take<-glyphosate$water==2
points(log(1+glyphosate$conc[take]),log(glyphosate$rootlength[take]),pch=1,cex=1.5)
dev.off()
#
# Crossover analysis of the asthma trial data using a linear model, with d
# denoting the difference between PEF values for the two drugs used on the 
# same child, and v coded as 1 or -1 according to the order in which the two
# drugs were given (Section 7.6.4)
#
d<-c(40,50,70,20,40,30,-35,15,90,30,30,80,130)
v<-c(1,1,1,1,1,1,1,-1,-1,-1,-1,-1,-1)
fit<-lm(d~v)
summary(fit)
#
# Analysing a four-treatment experiment (log-transformed expression levels for a single
# arabadopsis thaliana gene) using a linear model, with treatments initially considered
# as a single factor on four levels (Section 7.6.5)
#
 y<-c(6.469,6.575,7.080,7.714,8.429,6.998,6.357,6.278,5.906,6.874,7.870,3.872)
treatment<-as.factor(c(3,3,3,4,4,4,1,1,1,2,2,2))
fit1<-lm(log2(y)~-1+treatment)
summary(fit1)
fit2<-lm(log2(y)~treatment)
summary(fit2) 
#
# Analysis of the same data recognising the factorial structure of the four treatments,
# namely two factors (strain and challenge), each on two levels (Section 7.6.5)
#
strain<-c(1,1,1,1,1,1,0,0,0,0,0,0)
challenge<-c(0,0,0,1,1,1,0,0,0,1,1,1)
fit3<-lm(log2(y)~strain+challenge+strain:challenge)
summary(fit3)
#
# Fitting a linear model to the transformed glyphosate data (Section 7.6.8)
#
glyphosate<-read.table("../data/glyphosate.data",header=F)
names(glyphosate)<-c("concentration","water","rep","rootlength")
y<-log(glyphosate$rootlength)
x<-log(1+glyphosate$concentration)
water<-as.factor(glyphosate$water)
fit.glyphosate<-lm(y~water+x)
summary(fit.glyphosate)
n<-54
sigmasquared<-sum(fit.glyphosate$resid^2)/n
L0<-(-n/2)*log(sigmasquared)
fit2.glyphosate<-lm(y~water+x+water:x)
summary(fit2.glyphosate)
sigmasquared<-sum(fit2.glyphosate$resid^2)/n
L1<-(-n/2)*log(sigmasquared)
#
# The preferred model fits separate mean response profiles for root-length as a function of
# glyphosate concentration, according to whether plants are grown in distilled or tap water
# (Figure 7.8)
#
x<-0.01*(0:350)
ymat<-NULL
#
# Draw four fitted curves, corresponding to each of two parameters set at the lower and upper
# ends of their respective 95% confidence intervals
#
for (alpha in c(4.6193,4.8749)) {
   for (beta in c(-1.1046, -0.7866)) {
      ymat<-rbind(ymat,exp(alpha+beta*log(1+x)))
   }
}
pdf(file="fitted_untransformed.pdf",height=6,width=8)
par(mfrow=c(1,1))
plot(glyphosate$conc,glyphosate$rootlength,type="n",xlab="concentration",ylab="root-length",
   cex.axis=1.5,cex.lab=1.5)
take<-glyphosate$water==1
points(glyphosate$conc[take],glyphosate$rootlength[take],pch=19,cex=1.5)
for (i in 1:2) {
   lines(x,ymat[i,],lwd=2)
   }
for (i in 3:4) {
   lines(x,ymat[i,],lwd=2,lty=2)
   }
dev.off()

#
# Anscombe's quartet: four synthetic data-sets that give identical linear model fits,
# but contrasting patterns of residuals (Figures 7.9, 7.10)
#
data<-read.table("../data/anscombe_data.txt",header=T)
y<-data$y1
x<-data$x
fit<-lm(y~x)
summary(fit)
#
#Call:
#lm(formula = y ~ x)
#
#Residuals:
#     Min       1Q   Median       3Q      Max 
#-1.92127 -0.45577 -0.04136  0.70941  1.83882 
#
#Coefficients:
#            Estimate Std. Error t value Pr(>|t|)   
#(Intercept)   3.0001     1.1247   2.667  0.02573 * 
#x             0.5001     0.1179   4.241  0.00217 **
#---
#Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
#
#Residual standard error: 1.237 on 9 degrees of freedom
#Multiple R-squared: 0.6665,     Adjusted R-squared: 0.6295 
#F-statistic: 17.99 on 1 and 9 DF,  p-value: 0.002170 
#
xfit<-c(0,20)
yfit<-3.0001+0.5001*xfit
x4<-data$x4
y1<-y
y2<-data$y2
y3<-data$y3
y4<-data$y4
xfit<-c(0,20)
yfit<-3.0001+0.5001*xfit
x4<-data$x4
y1<-y
y2<-data$y2
y3<-data$y3
y4<-data$y4
pdf(file="anscombe.pdf",height=6,width=8)
par(mfrow=c(2,2))
plot(xfit,yfit,type="l",xlab="x",ylab="y",xlim=c(0,20),ylim=c(0,14),cex.lab=1.5,cex.axis=1.5)
points(x,y1,pch=19,cex=1.5)
plot(xfit,yfit,type="l",xlab="x",ylab="y",xlim=c(0,20),ylim=c(0,14),cex.lab=1.5,cex.axis=1.5)
points(x,y2,pch=19,cex=1.5)
plot(xfit,yfit,type="l",xlab="x",ylab="y",xlim=c(0,20),ylim=c(0,14),cex.lab=1.5,cex.axis=1.5)
points(x,y3,pch=19,cex=1.5)
plot(xfit,yfit,type="l",xlab="x",ylab="y",xlim=c(0,20),ylim=c(0,14),cex.lab=1.5,cex.axis=1.5)
points(x4,y4,pch=19,cex=1.5)
dev.off()
f1<-3.0001+0.5001*x
f2<-f1
f3<-f1
f4<-3.0001+0.5001*x4
r1<-y1-f1
r2<-y2-f2
r3<-y3-f3
r4<-y4-f4
pdf(file="anscombe_residuals.pdf",height=6,width=8)
par(mfrow=c(2,2))
plot(f1,r1,pch=19,cex=1.5,xlab="fitted values",ylab="residuals",cex.lab=1.5,cex.axis=1.5,xlim=c(4,14),ylim=c(-3.5,3.5))
lines(c(4,14),c(0,0),lty=2)
plot(f2,r2,pch=19,cex=1.5,xlab="fitted values",ylab="residuals",cex.lab=1.5,cex.axis=1.5,xlim=c(4,14),ylim=c(-3.5,3.5))
lines(c(4,14),c(0,0),lty=2)
plot(f3,r3,pch=19,cex=1.5,xlab="fitted values",ylab="residuals",cex.lab=1.5,cex.axis=1.5,xlim=c(4,14),ylim=c(-3.5,3.5))
lines(c(4,14),c(0,0),lty=2)
plot(f4,r4,pch=19,cex=1.5,xlab="fitted values",ylab="residuals",cex.lab=1.5,cex.axis=1.5,xlim=c(4,14),ylim=c(-3.5,3.5))
lines(c(4,14),c(0,0),lty=2)
dev.off()
#
# A plot of residuals against time, for a one-year series of daily maximum temperatures fitted
# by a simple harmonic regression model (Figure 7.11)
#
maxtemp<-read.table("../data/maxtemp.data",header=F)
names(maxtemp)<-c("year","month","day","temperature")
n<-dim(maxtemp)[1]
time<-1:n
temperature<-maxtemp$temperature
x1<-sin(2*pi*time/n)
x2<-cos(2*pi*time/n)
fit<-lm(temperature~x1+x2)
pdf(file="maxtemp_residuals.pdf",height=6,width=8)
plot(time,fit$resid,type="l",xlab="time (days)",ylim=c(-8,8),cex.axis=1.5,
   cex.lab=1.7,ylab="residual")
lines(c(0,366),c(0,0),lty=2)
dev.off()
#
# A plot of residuals against fitted values for the data from the glyphosate experiment
# (Figure 7.12)
#
r<-fit2.glyphosate$residuals
f<-fit2.glyphosate$fitted.values
pdf(file="fitted_v_residuals.pdf",height=6,width=8)
plot(f,r,xlab="fitted values",ylab="residuals",type="n",cex.lab=1.5,cex.axis=1.5)
points(f[1:27],r[1:27],pch=19,cex=1.5)
points(f[28:54],r[28:54],pch=1,cex=1.5)
dev.off()
#
# Fitting a simple exponential growth model to world population data 
#
# Tabulate data and differences between population in successive five-year periods
# (Table 7.1)
#
pop<-read.table("../data/world_population.data",header=T)
y<-pop[,2]
d<-c(NA,y[2:13]-y[1:12])
r<-c(NA,y[2:13]/y[1:12])
round(cbind(pop,d,r),3)
#
# Plot log-population against year to show that exponential growth
# model is reasonable up to 1990 but not beyond (Figure 7.13)
#
pdf("world_population.pdf",paper="special",height=6,width=6)
plot(pop[,1],log(pop[,2]),pch=19,xlab="year",ylab="log(population)")
y<-log(pop[1:9,2])
x<-pop[1:9,1]
fit.pop<-lm(y~x)
lines(x,fit.pop$fitted.values,lwd=2)
dev.off()
#
# Plot population against year, and log-transformed population against year,
# to see if exponential growth is a reasonable assumption
#
ratio<-c(NA,pop[2:13,2]/pop[1:12,2])
difference<-c(NA,pop[2:13,2]-pop[1:12,2])
cbind(pop,difference,ratio)
year<-pop$year
population<-pop$population
pdf(file="world_population.pdf",height=6,width=8)
par(mfrow=c(1,2))
plot(year,population,pch=19,cex=1.5,cex.lab=1.5,cex.axis=1.5)
plot(year,log(population),pch=19,cex=1.5,cex.lab=1.5,cex.axis=1.5)
dev.off()
#
# Fit split-line model, with change in slope in 1990 (Figure 7.14)
#
pdf("world_population_fit.pdf",paper="special",height=6,width=6)
par(pty="s")
plot(pop[,1],log(pop[,2]),pch=19,xlab="year",ylab="log(population)")
y<-log(pop[,2])
x<-pop[,1]-1950
u<-c(rep(0,9),pop[10:13,1]-pop[9,1])
fit3<-lm(y~x+u)
lines(x,fit3$fitted.values,lwd=2)
dev.off()
#
# Graphical comparison between exponential and logistic growth (Figure 7.15)
#
t<-0.1*(0:100)
alpha<-0
beta<-1
gamma<-100
y1<-exp(alpha+beta*t); y1<-y1/y1[1]
y2<-gamma*exp(alpha + beta*t)/(gamma + exp(alpha + beta*t));y2<-y2/y2[1]
pdf(file="exponential_and_logistic.pdf",height=6,width=8)
plot(t,y2,type="l",lwd=2,cex.lab=1.5,cex.axis=1.5,xlab="t",ylab="mu(t)")
lines(t,y1,lty=2,lwd=2)
dev.off()
#
# Illustration of the logistic model for binary data. Each curve shows how
# the modelled probability of a positive response changes with the value of
# a single explanatory variable (Figure 7.16)
# 
x<-0.01*((-500):500)
y1<-exp(x)/(1+exp(x))
y2<-exp(1.0+x)/(1+exp(1.0+x))
y3<-exp(2*x)/(1+exp(2*x))
pdf(file="logit_inverse.pdf",height=6,width=8)
plot(x,y1,type="l",cex.lab=1.5,cex.axis=1.5,lwd=2,ylab="mu(x)")
lines(x,y2,lty=2,lwd=2)
lines(x,y3,lty=3,lwd=2)
dev.off()
#
# Graphical comparison between linear and quartic logistic models. Note
# how the quartic model produces an asymmetry in the shape of the curve
# (Figure 7.17)
#
x<-0.01*(0:1000)
y1<-exp(-5+2*x)/(1+exp(-5+2*x))
y2<-exp(-5+0.01*x*x*x*x)/(1+exp(-5+0.01*x*x*x*x))
pdf(file="logistic_quartic.pdf",height=6,width=8)
plot(x,y1,type="l",cex.lab=1.5,cex.axis=1.5,lwd=2,ylab="mu(x)")
lines(x,y2,lty=2,lwd=2)
dev.off()
#
# Poisson probability distributions with means 2, 4 and 8 (Figure 7.18)
#
x<-0:20
y2<-dpois(x,2)
y4<-dpois(x,4)
y8<-dpois(x,8)
pdf(file="poisson.pdf",paper="special",width=6,height=5)
plot(x,y2,type="l",xlab="x",ylab="p(x)",lwd=2)
lines(x,y4,lty=2,lwd=2)
lines(x,y8,lty=3,lwd=2)
dev.off()