# # This file contains R commands that reproduce the analyses # presented in Chapter 5 # # Read Mercer and Hall data from named file and display plot-yields # as a grey-scale image (Figure 5.2) # mercerhall<-read.table("../data/mercerhall.data",header=T) names(mercerhall)<-c("col","row","yield") yields<-matrix(mercerhall$yield,20,25,T) pdf(file="mercerhall.pdf",height=6,width=8) image(y=unique(mercerhall$row)[20:1],x=unique(mercerhall$col), z=t(yields),xlab=" ",ylab=" ",col=grey((0:63)/63)) dev.off() # # divide plots equally between two fictitious treatments (varieties of wheat) # corrresponding to left-hand and right-hand halves of the field and calculate # summary statistics # yields1<-c(yields[,1:12]) yields1<-c(yields1,yields[1:10,13]) m1<-mean(yields1) se1<-sqrt(var(yields1)/250) yields2<-c(yields[,14:25]) yields2<-c(yields2,yields[11:20,13]) m2<-mean(yields2) se2<-sqrt(var(yields2)/250) round(c(m1,se1,m2,se2),3) var.pooled<-(var(yields1)+var(yields2))/2 se.pooled<-sqrt(2*var.pooled/250) # # 95% confidence interval for difference between mean yields of the two # varieties (the method for constructing confidence intervals is explained # in Chapter 6. Simple comparative experiments # CI.diff<-(m1-m2)+c(-1,1)*qt(0.975,498)*se.pooled # # Display results as back-to-back histograms (Figure 5.4) # # Note that we have written our own function (back.to.back) to produce this plot. You can also # find an equivalent function (histbackback) in the CRAN package Hmisc # pdf(file="mercerhall_yields.pdf",height=6,width=8) breaks<-2.4+0.2*(0:15) back.to.back(yields1,yields2,breaks=breaks,ylab="yield",cex.lab=1.5,cex.axis=1.5) text(x=c(-30,30),y=c(2.5,2.5),labels=c("variety 1","variety 2"),cex=1.5) dev.off() # # Repeat, but now allocating plots to varieties at random # set.seed(1890473) yields<-mercerhall$yield index1<-sample(1:500,size=250,replace=F) index2<-rep(F,500) for (i in 1:500) { index2[i]<-(sum(i==index1)==0) } index2<-(1:500)[index2] yields1<-yields[index1] m1<-mean(yields1) se1<-sqrt(var(yields1)/250) yields2<-yields[index2] m2<-mean(yields2) se2<-sqrt(var(yields2)/250) round(c(m1,se1,m2,se2),3) var.pooled<-(var(yields1)+var(yields2))/2 se.pooled<-sqrt(2*var.pooled/250) CI.diff<-(m1-m2)+c(-1,1)*qt(0.975,498)*se.pooled pdf(file="mercerhall_yields_randomised.pdf",height=6,width=8) breaks<-2.4+0.2*(0:15) back.to.back(yields1,yields2,breaks=breaks,ylab="yield",cex.lab=1.5,cex.axis=1.5) text(x=c(-30,30),y=c(2.5,2.5),labels=c("variety 1","variety 2"),cex=1.5) dev.off() # # Monte Carlo permutation tests to establish whether there is a significant difference # between the yields of the two varieties (Figure 5.6), noting that there is no genuine # difference, as the original plot-yields were allocated at random between two # fictitious varieties) # set.seed(1890473) yields<-mercerhall$yield index1<-sample(1:500,size=250,replace=F) index2<-rep(F,500) for (i in 1:500) { index2[i]<-(sum(i==index1)==0) } index2<-(1:500)[index2] diff<-mean(yields[index2])-mean(yields[index1]) ryields<-yields rdiff<-rep(0,999) for (i in 1:999) { ryields<-sample(ryields,500,T) rdiff[i]<-mean(ryields[index1])-mean(ryields[index2]) } # # back-to-back histograms of two sets of yields (Figure 5.4) # back.to.back<-function(y1,y2,breaks=min(c(y1,y2))+(0:10)*(max(c(y1,y2))-min(c(y1,y2)))/10, xlab="frequency",ylab="y",cex.lab=1,cex.axis=1) { h1<-hist(y1,breaks=breaks,plot=F) h2<-hist(y2,breaks=breaks,plot=F) xlim<-c(min(-h1$counts)-1,max(h2$counts)+1) ylim<-range(breaks) nbin<-length(breaks)-1 plot(xlim,ylim,xlim=xlim,ylim=ylim,type="n",xlab="frequency",ylab=ylab,cex.lab=cex.lab,cex.axis=cex.axis) for (i in 1:nbin) { lines(c(0,rep(-h1$count[i],2),0),c(rep(breaks[i],2),rep(breaks[i+1],2))) lines(c(0,rep(h2$count[i],2),0),c(rep(breaks[i],2),rep(breaks[i+1],2))) } lines(c(0,0),ylim,lwd=2) } pdf(file="mercerhall_permutations.pdf",height=6,width=8) par(mfrow=c(1,2),pty="s") hist(rdiff,xlab="difference",cex.lab=1.5,cex.axis=1.5) points(diff,0,pch=19,cex=1.5) ryields<-yields ryields[index2]<-ryields[index2]+0.1 diff2<-mean(ryields[index2])-mean(ryields[index1]) rdiff<-rep(0,999) for (i in 1:999) { ryields<-sample(ryields,500,T) rdiff[i]<-mean(ryields[index2])-mean(ryields[index1]) } hist(rdiff,xlab="difference",cex.lab=1.5,cex.axis=1.5,xlim=range(c(rdiff,diff2))) points(diff2,0,pch=19,cex=1.5) dev.off() # # simulation of completely randomised design on 20 treatments (Section 5.3) # set.seed(35671) yields<-mercerhall$yield constants<-sample(1:5,20,T) treatments<-rep(1:20,25) treatments<-sample(treatments,500,F) yields<-yields+constants[treatments] means<-rep(0,20) vars<-rep(0,20) for (i in 1:20) { yieldsi<-yields[treatments==i] means[i]<-mean(yieldsi) vars[i]<-var(yieldsi) } results.cr<-matrix(0,10,4) row<-0 for (i in 1:4) { for (j in (i+1):5) { row<-row+1 results.cr[row,1]<-i results.cr[row,2]<-j results.cr[row,3]<-means[i]-means[j] results.cr[row,4]<-sqrt(((vars[i]+vars[j])/2)*(2/25)) } } # # simulation of complete randomised block design on 20 treatments (Section 5.3) # yields<-matrix(mercerhall$yield,20,25,T) treatments<-matrix(0,20,25) for (j in 1:25) { treatments[,j]<-sample(1:20,20,F) yields[,j]<-yields[,j]+constants[treatments[,j]] } results.crb<-matrix(0,10,4) row<-0 for (i in 1:4) { for (j in (i+1):5) { row<-row+1 results.crb[row,1]<-i results.crb[row,2]<-j diff<-rep(0,25) for (col in 1:25) { diff[col]<-yields[treatments[,col]==i,col]-yields[treatments[,col]==j,col] } results.crb[row,3]<-mean(diff) results.crb[row,4]<-sqrt(var(diff)/25) } } round(results.cr,3) round(results.crb,3) round(apply(results.cr,2,mean),3) round(apply(results.crb,2,mean),3) # # factorial experiments: data extracted from the microarray experiment # discussed in Chapter 4. Exploratory data analysis # y<-c(5.208,6.469,12.924,7.854, 6.649,6.575,15.234,10.226, 7.122,7.080,18.027,9.111, 5.031,7.714,16.699,8.308, 4.755,8.429,15.186,8.688, 6.053,6.998,15.709,12.249, 6.410,6.357,11.772,8.741, 5.482,6.278,16.407,8.395, 5.180,5.906,24.235,8.913, 5.211,6.874,17.471,8.645, 6.710,7.870,14.657,7.255, 5.564,3.872,20.247,13.179) y<-matrix(y,12,4,T) y2<-log2(y[,2]) y2<-matrix(y2,4,3,T) y2means<-apply(y2,1,mean) y2means<-matrix(y2means,2,2,T) y2means<-round(y2means,3) y2means<-cbind(y2means,round(apply(y2means,1,mean),3)) y2means<-rbind(y2means,round(apply(y2means,2,mean),3)) y2means