#
# This file contains R commands that reproduce the analyses 
# presented in Chapter 10
#
# load required packages
#
library(splancs)
library(fields)
library(geoR)
#
# commands to create Figure 10.4
#
x<-rep(1:20,20)
y<-c(matrix(x,20,20,T))
pdf(file="shifted_lattice.pdf",height=6,width=8)
par(pty="s")
plot(x,y,xlab=" ",ylab=" ",cex.axis=1.5,xlim=c(1,20),ylim=c(1,20),
   type="n")
take<-(x<=17)&(y<=15)
pointmap(cbind(x[take],y[take]),pch=1,add=T)
take<-(x>3)&(y>5)
pointmap(cbind(x[take],y[take]),pch=1,add=T)
rectangle<-cbind(c(0.5,17.5,17.5,0.5),c(0.5,0.5,15.5,15.5))
polymap(rectangle,add=T)
rectangle[,1]<-rectangle[,1]+3
rectangle[,2]<-rectangle[,2]+5
polymap(rectangle,add=T)
lines(c(1,4),c(1,6),lty=2,lwd=2)
lines(c(12,15),c(7,12),lty=2,lwd=2)
lines(c(10,13),c(14,19),lty=2,lwd=2)
dev.off()
#
# spatial correlogram of wheat uniformity trial data (Figure 10.5)
#
wheat<-read.table("../data/mercerhall.data",header=T)
yield<-wheat[,3]
mean.yield<-mean(yield)
var.yield<-var(yield)
yield<-matrix(yield,25,20,byrow=T)
z<-(yield-mean.yield)/sqrt(var.yield)
spatial.corr<-matrix(0,21,11)
for (i in 0:10) {
   for (j in 0:10) {
      x<-z[(1:(25-i)),(1:(20-j))]
      y<-z[((1+i):25),((1+j):20)]
      spatial.corr[10+i+1,j+1]<-sum(x*y)/((25-i)*(20-j))
      x<-z[((1+i):25),(1:(20-j))]
      y<-z[(1:(25-i)),((1+j):20)]
      spatial.corr[10-i+1,j+1]<-sum(x*y)/((25-i)*(20-j))
      }
   }
spatial.corr[11,1]<-NA
range(spatial.corr,na.rm=T) # 
pdf(file="spatial_correlation.pdf",height=6,width=8)
par(pty="s")
image.plot((-10):10,0:10,spatial.corr,xlab="r",ylab="s",xlim=c(-10.5,10.5),ylim=c(-0.5,10.5),
    zlim=c(-0.3,0.6),asp=1,legend.shrink=1,legend.width=2,cex.lab=1.5,
    col=gray((0:20)/20),axis.args=list(cex.axis=1.5))
#col=gray((0:20)/20),
dev.off()
#
# Lattice-based variogram of the wheat yield data (Figure 10.6)
#
variogram<-matrix(0,21,11)
for (i in 0:10)  {
   for (j in 0:10) {
      x<-z[(1:(25-i)),(1:(20-j))]
      y<-z[((1+i):25),((1+j):20)]
      variogram[10+i+1,j+1]<-0.5*sum((x-y)^2)/((25-i)*(20-j))
      x<-z[((1+i):25),(1:(20-j))]
      y<-z[(1:(25-i)),((1+j):20)]
      variogram[10-i+1,j+1]<-0.5*sum((x-y)^2)/((25-i)*(20-j))
      }
   }
variogram[11,1]<-NA
range(variogram,na.rm=T) #
pdf(file="variogram.pdf",height=6,width=8)
par(pty="s")
image.plot((-10):10,0:10,variogram,xlab="u",ylab="v",xlim=c(-10.5,10.5),ylim=c(-0.5,10.5),
    zlim=c(0.5,1.3),asp=1,legend.shrink=1,legend.width=2,cex.lab=1.5,
    col=gray((0:20)/20),axis.args=list(cex.axis=1.5))
lines(c(-10.5,10.5,10.5,-10.5,-10.5),c(-0.5,-0.5,10.5,10.5,-0.5))
dev.off()
#
# non-isotropic variogram cloud and binned version, simulated data (Figure 10.7)
#
set.seed(293745)
k<-4
n<-k*k
frac<-0.15
alpha<-0.5
beta<-0.5
x<-(c(matrix(1:k,k,k,T))/k)+frac*(runif(n)-0.5)-0.5/k
y<-(c(matrix(1:k,k,k,F))/k)+frac*(runif(n)-0.5)-0.5/k
z<-1+runif(n)+alpha*x+beta*y
dx<-NULL
dy<-NULL
u<-NULL
v<-NULL
ivec<-NULL
jvec<-NULL
for (i in 1:n) {
   for (j in 1:n) {
      if (i!=j) {
         dx<-c(dx,x[i]-x[j])
         dy<-c(dy,y[i]-y[j])
         u<-c(u,sqrt((x[i]-x[j])^2+(y[i]-y[j])^2))
         v<-c(v,0.5*(z[i]-z[j])^2)
         ivec<-c(ivec,i)
         jvec<-c(jvec,j)
         }
      }
   }
pdf(file="cloud_and_bin.pdf",height=6,width=8)
par(mfrow=c(1,2),pty="s")
plot(x,y,type="n",xlim=c(0,1),ylim=c(0,1),cex.axis=1.5,cex.lab=1.5)
scalez<-0.02
scalev<-0.08
symbols(x,y,circles=scalez*z,inches=F,add=T)
plot(dx,dy,type="n",xlim=c(-1,1),ylim=c(-1,1),cex.axis=1.5,cex.lab=1.5)
symbols(dx,dy,circles=scalev*v,inches=F,add=T)
binwidth<-1/k
k1<-k-1
for (i in (-k1:k)) {
   lines(rep((i-0.5)*binwidth,2),c(-1,1),lty=2)
   }
for (j in (-k1:k)) {
   lines(c(-1,1),rep((j-0.5)*binwidth,2),lty=2)
   }
dev.off()
#
# isotropic variogram cloud and binned version, simulated data (Figure 10.8)
#
pdf(file="isotropic_cloud_and_bin.pdf",height=6,width=8)
plot(u,v,pch=19,cex.axis=1.5,cex.lab=1.5,xlim=c(0,1.25))
for (i in 0:(k+1)) {
   lines(rep(i*binwidth,2),c(0,max(v)),lty=2)
   }
dev.off()
#
# directional variogram function
#
directional.variogram<-function(x,y,z,cellwidth,dmax) {
#
# Arguments:
#         x,y: spatial coordinates of data-values
#           z: data-values
#   cellwidth: width of binning interval in each coordinate direction (square bins)
#        dmax: maximum separatioon in each coordinate direction for calculation of variogram
#
# Result: list with elements
#   $cloud: three-dimensional variogram cloud
#     $bin: binned version of variogram cloud
#
   n<-length(x)
   zerocell<-1+ceiling((dmax-0.000001)/cellwidth)
   ncell<-2*zerocell-1
   vbin<-matrix(0,ncell,ncell)
   nbin<-matrix(0,ncell,ncell)
   dx<-NULL
   dy<-NULL
   u<-NULL
   v<-NULL
   ivec<-NULL
   jvec<-NULL
   for (i in 1:n) {
      for (j in 1:n) {
         if (i!=j) {
            dx<-c(dx,x[i]-x[j])
            dy<-c(dy,y[i]-y[j])
            u<-c(u,sqrt((x[i]-x[j])^2+(y[i]-y[j])^2))
            v<-c(v,0.5*(z[i]-z[j])^2)
            ivec<-c(ivec,i)
            jvec<-c(jvec,j)
            }
         }
      }
   N<-length(dx)
   for (i in 1:N) {
      dxi<-round(dx[i]/cellwidth)
      icellx<-zerocell+dxi
      dyi<-round(dy[i]/cellwidth)
      icelly<-zerocell+dyi
      if ((min(icellx,icelly)>0)&(max(icellx,icelly)<=ncell)) {
         nbin[icellx,icelly]<-nbin[icellx,icelly]+1
         vbin[icellx,icelly]<-vbin[icellx,icelly]+v[i]
         }
      }
   vbin<-vbin/nbin
   vbin[nbin==0]<-NA
   list(cloud=cbind(dx,dy,v),
      bin=list(x=cellwidth*((-zerocell):zerocell),
               y=cellwidth*((-zerocell):zerocell),
               z=vbin,n=nbin))
   }
#
# an isotropic variogram with two different extrapolations to the origin (Figure 10.11)
#
u<-vario$u
v<-vario$v
x1<-c(0.00000000,0.04638353,1.30000000,3.17727207,7.09088274,
12.56993767,20.39715901,27.44165821,36.05160168,47.00971155,
61.09870996,78.31859690,96.32120597,126.06464705,162.85258733,
192.59602841)
y1<-c(0.2050791,1.40206110,2.08774384,2.60000000,3.16098638,3.66779536,
4.08516746,4.38329039,4.62178874,4.85000000,5.00000000,5.06897313,
5.06897313,5.06897313,5.06897313,5.06807313)
x2<-c(0.00000,12.56994,20.39716,27.44166,36.05160,47.00971,61.09871,
78.31860,96.32121,126.06465,162.85259,192.59603)
y2<-c(2.500000,3.667795,4.085167,4.383290,4.621789,4.850000,5.000000,5.068973,
5.068973,5.068973,5.068973,5.068073)
pdf(file="varioextrapolated.pdf",height=6,width=8)
plot(u,v,pch=19,cex.lab=1.5,cex.axis=1.5,xlab="distance",ylab="variogram",xlim=c(0,100),ylim=c(0,7))
lines(x1,y1,lwd=2)
lines(x2,y2,lwd=2,lty=2)
dev.off()
#
# analysis of Galicia lead pollution data
#
XYZ<-read.table("../data/galicia_lead.data",header=T)
x<-XYZ[,1]
y<-XYZ[,2]
z<-XYZ[,3]
#
# histograms of untransformed and log-transformed values of lead concentration (Figure 10.12)
#
pdf(file="lead_histograms.pdf",height=6,width=8)
par(mfrow=c(1,2))
hist(z,xlab="lead concentration",ylab="frequency",cex.lab=1.5,cex.axis=1.5,lwd=2,main=" ")
hist(log(z),xlab="log-transformed lead concentration",ylab="frequency",cex.lab=1.5,cex.axis=1.5,lwd=2,main=" ")
dev.off()
#
# convert lead pollution data to a "geodata" object and plot as a map (Figure 10.3)
#
XYZ<-as.geodata(XYZ)
pdf(file="galicia_lead.pdf",height=6,width=8)
par(pty="s")
pointmap(XYZ$coords,pch=" ",xlim=c(450,700),ylim=c(4600,4850),xlab="X coordinate",ylab="Y coordinate",
   cex.axis=1.5,cex.lab=1.5)
coast<-read.table("../data/Galicia_boundary.data",header=T)/1000
polymap(coast,add=T)
scale<-0.4
symbols(XYZ$coords[,1],XYZ$coords[,2],circles=XYZ$data*scale,inches=F,add=T)
dev.off()
#
# compute directional variogram of untransformed lead pollution data (Figure 10.9)
#
cellwidth<-10
dmax<-100
vario.dir<-directional.variogram(x,y,z,cellwidth,dmax)
vmat<-vario.dir$bin$z
vmat2<-cbind(vmat[,11],0.5*(vmat[,1:10]+vmat[,12:21]))
pdf(file="galicia_directional_vario.pdf",height=6,width=8)
image.plot(10*((-10):10),10*(0:10),vmat2,xlab="u",ylab="v",xlim=c(-105,105),ylim=c(-5,105),
    zlim=c(0,15),asp=1,legend.shrink=1,legend.width=2,cex.lab=1.5,
    col=gray((0:15)/15),axis.args=list(cex.axis=1.5))
dev.off()
#
# compute isotropic variogram of untransformed lead pollution data (Figure 10.10)
#
vario<-variog(XYZ)
pdf(file="galicia_vario.pdf",height=6,width=8)
plot(vario,ylab="variogram",xlim=c(0,200),ylim=c(0,7),cex.lab=1.7,cex.axis=1.7,pch=19,cex=1.5)
dev.off()
#
# re-compute isotropic variogram of lead pollution after log-transformation (Figure 10.13)
#
pdf(file="galicia_variofit.pdf",height=6,width=8)
lead97<-as.geodata(cbind(x,y,z))
vario97<-variog(lead97,lambda=0)
plot(vario97,ylab="variogram",xlim=c(0,200),ylim=c(0,0.3),cex.lab=1.5,cex.axis=1.5,pch=19)
#
# looks compatible with exponential correlation function as provisional model
# get initial estimates for maximum likelihood fitting
#
variofit97<-variofit(vario97,ini=c(0.2,0.4),cov.model="mat",kappa=0.5,nugget=0.4) 
parameter estimates:
tausq sigmasq     phi 
 0.1536  0.0768  0.4 
# re-fit by maximum likelihood
#
ml97<-likfit(lead97,cov.model="mat",kappa=0.5,ini=c(0.0768,0.4),nugget=0.1536,lambda=0)
# re-run to check convergence
# likfit: estimated model parameters:
#    beta  tausq   sigmasq      phi 
#"1.5422" "0.0830" "0.1465" "19.3045" 
#
# likfit: maximised log-likelihood = -37.2
#
tausq<-ml97$tausq
sigmasq<-ml97$sigmasq
phi<-ml97$phi
vario.x<-(0:200)
vario97.y<-tausq+sigmasq*(1-exp(-vario.x/phi))
#
# add fitted thoeoretical variogram to existing plot (Figure 10.13)
#
lines(vario.x,vario97.y,lwd=2)
dev.off()
#
# read in digitised boiundary of Galicia and re-scale
#
Galicia<-read.table(file="../data/Galicia_boundary.data",header=T)
galicia.xy<-cbind(Galicia$V1+488000,Galicia$V2-16000)/1000
galicia.xy[,1]<-galicia.xy[,1]+0.05
#
# read approximating polygonal boundary
#
galicia.poly<-read.table("../data/Galicia_poly.data",header=T)
#
# set up fine grid of prediction locations for mapping predicted lead pollution surface
#
prediction.locations<- expand.grid(seq(450,700,l=51), seq(4600,4850,l=51))
#
# use ordinary kriging with maximum likelihood parameter estimates to compute
# predicted lead pollution surface (Figure 10.14)
#
k.control<-krige.control(obj.model=ml97,cov.model="matern",kappa=0.5,cov.pars=c(sigmasq,phi),
            nugget=tausq, lambda=0)
o.control<-output.control(simulations.predictive=T,quantile=c(0.25, 0.50, 0.75))
k.result<-krige.conv(lead97,locations=prediction.locations,borders=galicia.poly, 
krige=k.control,output=o.control)
range(k.result$predict) # 2.796807 7.822373
pdf("galicia_krige.pdf",paper="special",height=6,width=6)
par(pty="s")
image(k.result,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5)
dev.off()
#
# compute set of three maps showing lower quartile, median and upper quartile of predicted
# lead pollution as each prediciton location (Figure 10.15)
#
k.lower<-k.result
k.lower$predict<-k.result$quantiles.simulations[,1]
k.median<-k.result
k.median$predict<-k.result$quantiles.simulations[,2]
k.upper<-k.result
k.upper$predict<-k.result$quantiles.simulations[,3]
range(c(k.lower$predict,k.median$predict,k.upper$predict)) #2.036979 9.297778
pdf("galicia_krige_quantiles.pdf",paper="special",height=4,width=12)
par(pty="s",mfrow=c(1,3))
image(k.lower,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5)
image(k.median,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5)
image(k.upper,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5)
dev.off()
#
# compute predictive distribuiton of spatial maximum lead concentration and plot
# as histogram with 95% predictive limits
#
maxima<-apply(k.result$simulations, 2,max)
n<-length(maxima) # 1000
lower<-sort(maxima)[25]
upper<-sort(maxima)[975]
pdf(file="galicia_max.pdf",height=6,width=8)
hist(maxima,xlab="maximum lead concentration",ylab="frequency",main=" ",cex.lab=1.5,cex.axis=1.5)
lines(c(lower,lower),c(0,100),lwd=2,lty=2)
lines(c(upper,upper),c(0,100),lwd=2,lty=2)
dev.off()