# # This file contains R commands that reproduce the analyses # presented in Chapter 3. # # Create synthetic blood-pressure and BMI data. These data can also be # accessed directly from the web-page: # http://www.lancs.ac.uk/staff/diggle/intro-stats-book/datasets/ # set.seed(418778823) # # BMI values are simulated from a Normal distribution with mean 28 and # standard deviation 2.5 # BMI<-round(sort(28+2.5*rnorm(30)),1) # # logical variables F (false) and T (true) used to define which BMI values # are in group A # assign.to.A<-sample(rep(c(F,T),15),30,replace=F) # # the ! symbol is the logical "not" operator (if you are not on treatment A # you must be on treatment B) # assign.to.B<-!assign.to.A BMI.A<-BMI[assign.to.A] BMI.B<-BMI[assign.to.B] # # BP values are linearly related to BMI values, plus Normally distributed residual # BP.A<-round(120+2*BMI.A+2.0*rnorm(15),1) BP.B<-round(80+3.2*BMI.B+2.0*rnorm(15),1) # # Assign data to a four-column array, and display the first five rows # data<-cbind(BP.A,BMI.A,BP.B,BMI.B) data[1:5,] # # Draw dot-plot of blood-pressure for the first 10 people assigned to A, # and the first 10 people assigned to B (Figure 3.1) # pdf("BP_dotplot.pdf",paper="special",height=4,width=6) plot(BP.A[1:10],rep(0.2,10),pch=19,xlab="blood pressure",ylab=" ",yaxt="n",ylim=c(0,1),bty="n", cex=1,cex.axis=1.5,cex.lab=1.5,xlim=c(150,180)) points(BP.B[1:10],rep(0.3,10),pch=1) dev.off() # # Draw scatterplot of BMI against BP, using different plotting symbols and colours # to differentiate between treatments A and B (Figure 3.2) # pdf("BP_and_BMI_some.pdf",paper="special",height=6,width=8) plot(c(BMI.A,BMI.B),c(BP.A,BP.B),type="n",xlab="BMI",ylab="BP",cex.lab=1.5,cex.axis=1.5, xlim=c(22,30),ylim=c(150,180)) points(BMI.A[1:10],BP.A[1:10],pch=19,cex=1.5) points(BMI.B[1:10],BP.B[1:10],pch=1,cex=1.5) dev.off() # # Draw scatterplot of BMI against BP, using different plotting symbols and colours # to differentiate between treatments (A and B) and people who (hypothetically) # did or did not agree to take part in the study (Figure 3.3) # pdf("BP_and_BMI_all.pdf",paper="special",height=6,width=8) plot(c(BMI.A,BMI.B),c(BP.A,BP.B),type="n",xlab="BMI",ylab="BP",cex.lab=1.5,cex.axis=1.5) points(BMI.A[1:10],BP.A[1:10],pch=19,cex=1.5) points(BMI.B[1:10],BP.B[1:10],pch=1,cex=1.5) points(BMI.A[11:15],BP.A[11:15],pch=2,cex=1.5) points(BMI.B[11:15],BP.B[11:15],pch=5,cex=1.5) dev.off() # # Simulate 1000 independent tosses of a fair coin (changing the value of the argument of # the set.seed() function will generate a different simulation) # set.seed(49091) x<-runif(1000)<0.5 # # Calculate and plot proportion of heads against number of tosses (Figure 3.4) # cp<-cumsum(x)/(1:1000) plot(1:1000,cp,type="l",xlim=c(0,1000),ylim=c(0.4,1),xlab="number of tosses",ylab="proportion of heads", cex.lab=1.5,cex.axis=1.5) lines(c(0,1000),rep(0.5,2),lty=2) # # An example of a probability density function (Figure 3.5) # x<-0.01*(0:100) y<-dbeta(x,2.0,4.0) plot(x,y,type="l",xlab="outcome",ylab="pdf",lwd=2,cex.lab=1.5,cex.axis=1.5) # # A user-defined function to calculate the log-likelihood for a "Bernoulli sequence," # i.e. a sequence of independent tosses of a biased coin # logL<-function(rho,x) { # # Arguments # rho: vector of probabilities of positive outcome # x: binary sequence, x[i]=1/0 if ith outcome is positive/negative, respectively # Result # log-likelihood for each value of rho # npos<-sum(x==1) nneg<-sum(x==0) if ((npos+nneg)!=length(x)) print("invalid argument x") if (min(rho)<0) print("invalid argument rho") if (max(rho)>1) print("invalid argument rho") npos*log(rho)+nneg*log(1-rho) } # # Use the above function to calculate and plot the likelihood and log-likelihood functions # for the sequence used as an illustration in Section 3.4 of the book, coding # a positive outcome as 1 and a negative outcome as 0 # x<-c(1,0,0,1,1,0,1,1,1,0) rho<-0.01*(1:99) y<-logL(rho,x) par(mfrow=c(1,2)) plot(rho,exp(y),type="l",lwd=2,xlab="prevalence",ylab="likelihood",cex.lab=1.5,cex.axis=1.5) lines(c(0.6,0.6),c(0,exp(y[60])),lty=2,lwd=1) plot(rho,y,type="l",lwd=2,xlab="prevalence",ylab="log-likelihood",cex.lab=1.5,cex.axis=1.5) lines(c(0.6,0.6),c(y[1],y[60]),lty=2,lwd=1) dev.off() # # Graphical construction of a likelihood-based confidence interval for rho (Figure 3.6) # plot(rho,y,type="l",lwd=2,xlab="prevalence",ylab="log-likelihood",cex.lab=1.5,cex.axis=1.5,xlim=c(0,1), ylim=c(-12,-6)) # # find value of rho at which log-likelihood takes its maximum value # place<-order(y,decreasing=T) place.max<-place[1] Lmax<-y[place.max] Lcrit<-Lmax-1.92 # # Add horizontal lines at heights Lmax and Lcrit # lines(c(0,1),c(Lmax,Lmax),lty=2,lwd=1) lines(c(0,1),c(Lmax-1.92,Lmax-1.92),lty=2,lwd=1) # # Lower and upper end-points of 95% confidence interval are the # values of rho at which L(rho)=Lcrit #