#
# This file contains R commands that reproduce the analyses 
# presented in Chapter 3.
#
# Create synthetic blood-pressure and BMI data. These data can also be 
# accessed directly from the web-page:
#       http://www.lancs.ac.uk/staff/diggle/intro-stats-book/datasets/
# 
set.seed(418778823)
#
# BMI values are simulated from a Normal distribution with mean 28 and 
# standard deviation 2.5
#
BMI<-round(sort(28+2.5*rnorm(30)),1)
#
# logical variables F (false) and T (true) used to define which BMI values 
# are in group A
#
assign.to.A<-sample(rep(c(F,T),15),30,replace=F)
#
# the ! symbol is the logical "not" operator (if you are not on treatment A 
# you must be on treatment B)
#
assign.to.B<-!assign.to.A
BMI.A<-BMI[assign.to.A]
BMI.B<-BMI[assign.to.B]
#
# BP values are linearly related to BMI values, plus Normally distributed residual
#
BP.A<-round(120+2*BMI.A+2.0*rnorm(15),1)
BP.B<-round(80+3.2*BMI.B+2.0*rnorm(15),1)
#
# Assign data to a four-column array, and display the first five rows
#
data<-cbind(BP.A,BMI.A,BP.B,BMI.B)
data[1:5,]
#
# Draw dot-plot of blood-pressure for the first 10 people assigned to A,
# and the first 10 people assigned to B (Figure 3.1)
#
pdf("BP_dotplot.pdf",paper="special",height=4,width=6)
plot(BP.A[1:10],rep(0.2,10),pch=19,xlab="blood pressure",ylab=" ",yaxt="n",ylim=c(0,1),bty="n",
   cex=1,cex.axis=1.5,cex.lab=1.5,xlim=c(150,180))
points(BP.B[1:10],rep(0.3,10),pch=1)
dev.off()
#
# Draw scatterplot of BMI against BP, using different plotting symbols and colours
# to differentiate between treatments A and B (Figure 3.2)
#
pdf("BP_and_BMI_some.pdf",paper="special",height=6,width=8)
plot(c(BMI.A,BMI.B),c(BP.A,BP.B),type="n",xlab="BMI",ylab="BP",cex.lab=1.5,cex.axis=1.5,
   xlim=c(22,30),ylim=c(150,180))
points(BMI.A[1:10],BP.A[1:10],pch=19,cex=1.5)
points(BMI.B[1:10],BP.B[1:10],pch=1,cex=1.5)
dev.off()

#
# Draw scatterplot of BMI against BP, using different plotting symbols and colours
# to differentiate between treatments (A and B) and people who (hypothetically)
# did or did not agree to take part in the study (Figure 3.3)
#
pdf("BP_and_BMI_all.pdf",paper="special",height=6,width=8)
plot(c(BMI.A,BMI.B),c(BP.A,BP.B),type="n",xlab="BMI",ylab="BP",cex.lab=1.5,cex.axis=1.5)
points(BMI.A[1:10],BP.A[1:10],pch=19,cex=1.5)
points(BMI.B[1:10],BP.B[1:10],pch=1,cex=1.5)
points(BMI.A[11:15],BP.A[11:15],pch=2,cex=1.5)
points(BMI.B[11:15],BP.B[11:15],pch=5,cex=1.5)
dev.off()
#
# Simulate 1000 independent tosses of a fair coin (changing the value of the argument of 
# the set.seed() function will generate a different simulation)
#
set.seed(49091)
x<-runif(1000)<0.5
#
# Calculate and plot proportion of heads against number of tosses (Figure 3.4)
#
cp<-cumsum(x)/(1:1000)
plot(1:1000,cp,type="l",xlim=c(0,1000),ylim=c(0.4,1),xlab="number of tosses",ylab="proportion of heads",
cex.lab=1.5,cex.axis=1.5)
lines(c(0,1000),rep(0.5,2),lty=2)
#
# An example of a probability density function (Figure 3.5)
#
x<-0.01*(0:100)
y<-dbeta(x,2.0,4.0)
plot(x,y,type="l",xlab="outcome",ylab="pdf",lwd=2,cex.lab=1.5,cex.axis=1.5)
#
# A user-defined function to calculate the log-likelihood for a "Bernoulli sequence,"
# i.e. a sequence of independent tosses of a biased coin
#
logL<-function(rho,x) {
#
# Arguments
#   rho: vector of probabilities of positive outcome
#     x: binary sequence, x[i]=1/0 if ith outcome is positive/negative, respectively
# Result
#   log-likelihood for each value of rho
#
   npos<-sum(x==1)
   nneg<-sum(x==0)
   if ((npos+nneg)!=length(x)) print("invalid argument x")
   if (min(rho)<0) print("invalid argument rho")
   if (max(rho)>1) print("invalid argument rho")
   npos*log(rho)+nneg*log(1-rho)
   }
#
# Use the above function to calculate and plot the likelihood and log-likelihood functions 
# for the sequence used as an illustration in Section 3.4 of the book, coding
# a positive outcome as 1 and a negative outcome as 0
# 
x<-c(1,0,0,1,1,0,1,1,1,0)
rho<-0.01*(1:99)
y<-logL(rho,x)
par(mfrow=c(1,2))
plot(rho,exp(y),type="l",lwd=2,xlab="prevalence",ylab="likelihood",cex.lab=1.5,cex.axis=1.5)
lines(c(0.6,0.6),c(0,exp(y[60])),lty=2,lwd=1)
plot(rho,y,type="l",lwd=2,xlab="prevalence",ylab="log-likelihood",cex.lab=1.5,cex.axis=1.5)
lines(c(0.6,0.6),c(y[1],y[60]),lty=2,lwd=1)
dev.off()
#
# Graphical construction of a likelihood-based confidence interval for rho (Figure 3.6)
#
plot(rho,y,type="l",lwd=2,xlab="prevalence",ylab="log-likelihood",cex.lab=1.5,cex.axis=1.5,xlim=c(0,1),
   ylim=c(-12,-6))
#
# find value of rho at which log-likelihood takes its maximum value
#
place<-order(y,decreasing=T)
place.max<-place[1]
Lmax<-y[place.max]
Lcrit<-Lmax-1.92
#
# Add horizontal lines at heights Lmax and Lcrit
#
lines(c(0,1),c(Lmax,Lmax),lty=2,lwd=1)
lines(c(0,1),c(Lmax-1.92,Lmax-1.92),lty=2,lwd=1)
#
# Lower and upper end-points of 95% confidence interval are the
# values of rho at which L(rho)=Lcrit
#