#
# This file contains R commands that reproduce the analyses 
# presented in Chapter 5
#
# Read Mercer and Hall data from named file and display plot-yields
# as a grey-scale image (Figure 5.2)
#
mercerhall<-read.table("../data/mercerhall.data",header=T)
names(mercerhall)<-c("col","row","yield")
yields<-matrix(mercerhall$yield,20,25,T)
pdf(file="mercerhall.pdf",height=6,width=8)
image(y=unique(mercerhall$row)[20:1],x=unique(mercerhall$col),
   z=t(yields),xlab=" ",ylab=" ",col=grey((0:63)/63))
dev.off()
#
# divide plots equally between two fictitious treatments (varieties of wheat)
# corrresponding to left-hand and right-hand halves of the field and calculate 
# summary statistics
#
yields1<-c(yields[,1:12])
yields1<-c(yields1,yields[1:10,13])
m1<-mean(yields1)
se1<-sqrt(var(yields1)/250)
yields2<-c(yields[,14:25])
yields2<-c(yields2,yields[11:20,13])
m2<-mean(yields2)
se2<-sqrt(var(yields2)/250)
round(c(m1,se1,m2,se2),3)
var.pooled<-(var(yields1)+var(yields2))/2
se.pooled<-sqrt(2*var.pooled/250)
#
# 95% confidence interval for difference between mean yields of the two
# varieties (the method for constructing confidence intervals is explained
# in Chapter 6. Simple comparative experiments
#
CI.diff<-(m1-m2)+c(-1,1)*qt(0.975,498)*se.pooled
#
# Display results as back-to-back histograms (Figure 5.4)
#
# Note that we have written our own function (back.to.back) to produce this plot. You can also
# find an equivalent function (histbackback) in the CRAN package Hmisc
#
pdf(file="mercerhall_yields.pdf",height=6,width=8)
breaks<-2.4+0.2*(0:15)
back.to.back(yields1,yields2,breaks=breaks,ylab="yield",cex.lab=1.5,cex.axis=1.5)
text(x=c(-30,30),y=c(2.5,2.5),labels=c("variety 1","variety 2"),cex=1.5)
dev.off()
#
# Repeat, but now allocating plots to varieties at random
#
set.seed(1890473)
yields<-mercerhall$yield
index1<-sample(1:500,size=250,replace=F)
index2<-rep(F,500)
for (i in 1:500) {
   index2[i]<-(sum(i==index1)==0)
   }
index2<-(1:500)[index2]
yields1<-yields[index1]
m1<-mean(yields1)
se1<-sqrt(var(yields1)/250)
yields2<-yields[index2]
m2<-mean(yields2)
se2<-sqrt(var(yields2)/250)
round(c(m1,se1,m2,se2),3)
var.pooled<-(var(yields1)+var(yields2))/2
se.pooled<-sqrt(2*var.pooled/250)
CI.diff<-(m1-m2)+c(-1,1)*qt(0.975,498)*se.pooled
pdf(file="mercerhall_yields_randomised.pdf",height=6,width=8)
breaks<-2.4+0.2*(0:15)
back.to.back(yields1,yields2,breaks=breaks,ylab="yield",cex.lab=1.5,cex.axis=1.5)
text(x=c(-30,30),y=c(2.5,2.5),labels=c("variety 1","variety 2"),cex=1.5)
dev.off()
#
# Monte Carlo permutation tests to establish whether there is a significant difference
# between the yields of the two varieties (Figure 5.6), noting that there is no genuine 
# difference, as the original plot-yields were allocated at random between two 
# fictitious varieties)
#
set.seed(1890473)
yields<-mercerhall$yield
index1<-sample(1:500,size=250,replace=F)
index2<-rep(F,500)
for (i in 1:500) {
   index2[i]<-(sum(i==index1)==0)
   }
index2<-(1:500)[index2]
diff<-mean(yields[index2])-mean(yields[index1])
ryields<-yields
rdiff<-rep(0,999)
for (i in 1:999) {
   ryields<-sample(ryields,500,T)
   rdiff[i]<-mean(ryields[index1])-mean(ryields[index2])
   }
#
# back-to-back histograms of two sets of yields (Figure 5.4)
#
back.to.back<-function(y1,y2,breaks=min(c(y1,y2))+(0:10)*(max(c(y1,y2))-min(c(y1,y2)))/10,
xlab="frequency",ylab="y",cex.lab=1,cex.axis=1) {
   h1<-hist(y1,breaks=breaks,plot=F)
   h2<-hist(y2,breaks=breaks,plot=F)
   xlim<-c(min(-h1$counts)-1,max(h2$counts)+1)
   ylim<-range(breaks)
   nbin<-length(breaks)-1
   plot(xlim,ylim,xlim=xlim,ylim=ylim,type="n",xlab="frequency",ylab=ylab,cex.lab=cex.lab,cex.axis=cex.axis)
   for (i in 1:nbin) {
      lines(c(0,rep(-h1$count[i],2),0),c(rep(breaks[i],2),rep(breaks[i+1],2)))
      lines(c(0,rep(h2$count[i],2),0),c(rep(breaks[i],2),rep(breaks[i+1],2)))
      }
   lines(c(0,0),ylim,lwd=2)
   }
pdf(file="mercerhall_permutations.pdf",height=6,width=8)
par(mfrow=c(1,2),pty="s")
hist(rdiff,xlab="difference",cex.lab=1.5,cex.axis=1.5)
points(diff,0,pch=19,cex=1.5)
ryields<-yields
ryields[index2]<-ryields[index2]+0.1
diff2<-mean(ryields[index2])-mean(ryields[index1])
rdiff<-rep(0,999)
for (i in 1:999) {
   ryields<-sample(ryields,500,T)
   rdiff[i]<-mean(ryields[index2])-mean(ryields[index1])
   }
hist(rdiff,xlab="difference",cex.lab=1.5,cex.axis=1.5,xlim=range(c(rdiff,diff2)))
points(diff2,0,pch=19,cex=1.5)
dev.off()
#
# simulation of completely randomised design on 20 treatments (Section 5.3)
#
set.seed(35671)
yields<-mercerhall$yield
constants<-sample(1:5,20,T)
treatments<-rep(1:20,25)
treatments<-sample(treatments,500,F)
yields<-yields+constants[treatments]
means<-rep(0,20)
vars<-rep(0,20)
for (i in 1:20) {
   yieldsi<-yields[treatments==i]
   means[i]<-mean(yieldsi)
   vars[i]<-var(yieldsi)
   }
results.cr<-matrix(0,10,4)
row<-0
for (i in 1:4) {
   for (j in (i+1):5) {
      row<-row+1
      results.cr[row,1]<-i
      results.cr[row,2]<-j
      results.cr[row,3]<-means[i]-means[j]
      results.cr[row,4]<-sqrt(((vars[i]+vars[j])/2)*(2/25))
      }
   }
#
# simulation of complete randomised block design on 20 treatments (Section 5.3)
#
yields<-matrix(mercerhall$yield,20,25,T)
treatments<-matrix(0,20,25)
for (j in 1:25) {
   treatments[,j]<-sample(1:20,20,F)
   yields[,j]<-yields[,j]+constants[treatments[,j]]
   }
results.crb<-matrix(0,10,4)
row<-0
for (i in 1:4) {
   for (j in (i+1):5) {
      row<-row+1
      results.crb[row,1]<-i
      results.crb[row,2]<-j
      diff<-rep(0,25)
      for (col in 1:25) {
         diff[col]<-yields[treatments[,col]==i,col]-yields[treatments[,col]==j,col]
         }
      results.crb[row,3]<-mean(diff)
      results.crb[row,4]<-sqrt(var(diff)/25)
      }
   }
round(results.cr,3)
round(results.crb,3)
round(apply(results.cr,2,mean),3)
round(apply(results.crb,2,mean),3)
#
# factorial experiments: data extracted from the microarray experiment
# discussed in Chapter 4. Exploratory data analysis
#
y<-c(5.208,6.469,12.924,7.854,
6.649,6.575,15.234,10.226,
7.122,7.080,18.027,9.111,
5.031,7.714,16.699,8.308,
4.755,8.429,15.186,8.688,
6.053,6.998,15.709,12.249,
6.410,6.357,11.772,8.741,
5.482,6.278,16.407,8.395,
5.180,5.906,24.235,8.913,
5.211,6.874,17.471,8.645,
6.710,7.870,14.657,7.255,
5.564,3.872,20.247,13.179)
y<-matrix(y,12,4,T)

y2<-log2(y[,2])
y2<-matrix(y2,4,3,T)
y2means<-apply(y2,1,mean)
y2means<-matrix(y2means,2,2,T)
y2means<-round(y2means,3)
y2means<-cbind(y2means,round(apply(y2means,1,mean),3))
y2means<-rbind(y2means,round(apply(y2means,2,mean),3))
y2means