# # This file contains R commands that reproduce the analyses # presented in Chapter 10 # # load required packages # library(splancs) library(fields) library(geoR) # # commands to create Figure 10.4 # x<-rep(1:20,20) y<-c(matrix(x,20,20,T)) pdf(file="shifted_lattice.pdf",height=6,width=8) par(pty="s") plot(x,y,xlab=" ",ylab=" ",cex.axis=1.5,xlim=c(1,20),ylim=c(1,20), type="n") take<-(x<=17)&(y<=15) pointmap(cbind(x[take],y[take]),pch=1,add=T) take<-(x>3)&(y>5) pointmap(cbind(x[take],y[take]),pch=1,add=T) rectangle<-cbind(c(0.5,17.5,17.5,0.5),c(0.5,0.5,15.5,15.5)) polymap(rectangle,add=T) rectangle[,1]<-rectangle[,1]+3 rectangle[,2]<-rectangle[,2]+5 polymap(rectangle,add=T) lines(c(1,4),c(1,6),lty=2,lwd=2) lines(c(12,15),c(7,12),lty=2,lwd=2) lines(c(10,13),c(14,19),lty=2,lwd=2) dev.off() # # spatial correlogram of wheat uniformity trial data (Figure 10.5) # wheat<-read.table("../data/mercerhall.data",header=T) yield<-wheat[,3] mean.yield<-mean(yield) var.yield<-var(yield) yield<-matrix(yield,25,20,byrow=T) z<-(yield-mean.yield)/sqrt(var.yield) spatial.corr<-matrix(0,21,11) for (i in 0:10) { for (j in 0:10) { x<-z[(1:(25-i)),(1:(20-j))] y<-z[((1+i):25),((1+j):20)] spatial.corr[10+i+1,j+1]<-sum(x*y)/((25-i)*(20-j)) x<-z[((1+i):25),(1:(20-j))] y<-z[(1:(25-i)),((1+j):20)] spatial.corr[10-i+1,j+1]<-sum(x*y)/((25-i)*(20-j)) } } spatial.corr[11,1]<-NA range(spatial.corr,na.rm=T) # pdf(file="spatial_correlation.pdf",height=6,width=8) par(pty="s") image.plot((-10):10,0:10,spatial.corr,xlab="r",ylab="s",xlim=c(-10.5,10.5),ylim=c(-0.5,10.5), zlim=c(-0.3,0.6),asp=1,legend.shrink=1,legend.width=2,cex.lab=1.5, col=gray((0:20)/20),axis.args=list(cex.axis=1.5)) #col=gray((0:20)/20), dev.off() # # Lattice-based variogram of the wheat yield data (Figure 10.6) # variogram<-matrix(0,21,11) for (i in 0:10) { for (j in 0:10) { x<-z[(1:(25-i)),(1:(20-j))] y<-z[((1+i):25),((1+j):20)] variogram[10+i+1,j+1]<-0.5*sum((x-y)^2)/((25-i)*(20-j)) x<-z[((1+i):25),(1:(20-j))] y<-z[(1:(25-i)),((1+j):20)] variogram[10-i+1,j+1]<-0.5*sum((x-y)^2)/((25-i)*(20-j)) } } variogram[11,1]<-NA range(variogram,na.rm=T) # pdf(file="variogram.pdf",height=6,width=8) par(pty="s") image.plot((-10):10,0:10,variogram,xlab="u",ylab="v",xlim=c(-10.5,10.5),ylim=c(-0.5,10.5), zlim=c(0.5,1.3),asp=1,legend.shrink=1,legend.width=2,cex.lab=1.5, col=gray((0:20)/20),axis.args=list(cex.axis=1.5)) lines(c(-10.5,10.5,10.5,-10.5,-10.5),c(-0.5,-0.5,10.5,10.5,-0.5)) dev.off() # # non-isotropic variogram cloud and binned version, simulated data (Figure 10.7) # set.seed(293745) k<-4 n<-k*k frac<-0.15 alpha<-0.5 beta<-0.5 x<-(c(matrix(1:k,k,k,T))/k)+frac*(runif(n)-0.5)-0.5/k y<-(c(matrix(1:k,k,k,F))/k)+frac*(runif(n)-0.5)-0.5/k z<-1+runif(n)+alpha*x+beta*y dx<-NULL dy<-NULL u<-NULL v<-NULL ivec<-NULL jvec<-NULL for (i in 1:n) { for (j in 1:n) { if (i!=j) { dx<-c(dx,x[i]-x[j]) dy<-c(dy,y[i]-y[j]) u<-c(u,sqrt((x[i]-x[j])^2+(y[i]-y[j])^2)) v<-c(v,0.5*(z[i]-z[j])^2) ivec<-c(ivec,i) jvec<-c(jvec,j) } } } pdf(file="cloud_and_bin.pdf",height=6,width=8) par(mfrow=c(1,2),pty="s") plot(x,y,type="n",xlim=c(0,1),ylim=c(0,1),cex.axis=1.5,cex.lab=1.5) scalez<-0.02 scalev<-0.08 symbols(x,y,circles=scalez*z,inches=F,add=T) plot(dx,dy,type="n",xlim=c(-1,1),ylim=c(-1,1),cex.axis=1.5,cex.lab=1.5) symbols(dx,dy,circles=scalev*v,inches=F,add=T) binwidth<-1/k k1<-k-1 for (i in (-k1:k)) { lines(rep((i-0.5)*binwidth,2),c(-1,1),lty=2) } for (j in (-k1:k)) { lines(c(-1,1),rep((j-0.5)*binwidth,2),lty=2) } dev.off() # # isotropic variogram cloud and binned version, simulated data (Figure 10.8) # pdf(file="isotropic_cloud_and_bin.pdf",height=6,width=8) plot(u,v,pch=19,cex.axis=1.5,cex.lab=1.5,xlim=c(0,1.25)) for (i in 0:(k+1)) { lines(rep(i*binwidth,2),c(0,max(v)),lty=2) } dev.off() # # directional variogram function # directional.variogram<-function(x,y,z,cellwidth,dmax) { # # Arguments: # x,y: spatial coordinates of data-values # z: data-values # cellwidth: width of binning interval in each coordinate direction (square bins) # dmax: maximum separatioon in each coordinate direction for calculation of variogram # # Result: list with elements # $cloud: three-dimensional variogram cloud # $bin: binned version of variogram cloud # n<-length(x) zerocell<-1+ceiling((dmax-0.000001)/cellwidth) ncell<-2*zerocell-1 vbin<-matrix(0,ncell,ncell) nbin<-matrix(0,ncell,ncell) dx<-NULL dy<-NULL u<-NULL v<-NULL ivec<-NULL jvec<-NULL for (i in 1:n) { for (j in 1:n) { if (i!=j) { dx<-c(dx,x[i]-x[j]) dy<-c(dy,y[i]-y[j]) u<-c(u,sqrt((x[i]-x[j])^2+(y[i]-y[j])^2)) v<-c(v,0.5*(z[i]-z[j])^2) ivec<-c(ivec,i) jvec<-c(jvec,j) } } } N<-length(dx) for (i in 1:N) { dxi<-round(dx[i]/cellwidth) icellx<-zerocell+dxi dyi<-round(dy[i]/cellwidth) icelly<-zerocell+dyi if ((min(icellx,icelly)>0)&(max(icellx,icelly)<=ncell)) { nbin[icellx,icelly]<-nbin[icellx,icelly]+1 vbin[icellx,icelly]<-vbin[icellx,icelly]+v[i] } } vbin<-vbin/nbin vbin[nbin==0]<-NA list(cloud=cbind(dx,dy,v), bin=list(x=cellwidth*((-zerocell):zerocell), y=cellwidth*((-zerocell):zerocell), z=vbin,n=nbin)) } # # an isotropic variogram with two different extrapolations to the origin (Figure 10.11) # u<-vario$u v<-vario$v x1<-c(0.00000000,0.04638353,1.30000000,3.17727207,7.09088274, 12.56993767,20.39715901,27.44165821,36.05160168,47.00971155, 61.09870996,78.31859690,96.32120597,126.06464705,162.85258733, 192.59602841) y1<-c(0.2050791,1.40206110,2.08774384,2.60000000,3.16098638,3.66779536, 4.08516746,4.38329039,4.62178874,4.85000000,5.00000000,5.06897313, 5.06897313,5.06897313,5.06897313,5.06807313) x2<-c(0.00000,12.56994,20.39716,27.44166,36.05160,47.00971,61.09871, 78.31860,96.32121,126.06465,162.85259,192.59603) y2<-c(2.500000,3.667795,4.085167,4.383290,4.621789,4.850000,5.000000,5.068973, 5.068973,5.068973,5.068973,5.068073) pdf(file="varioextrapolated.pdf",height=6,width=8) plot(u,v,pch=19,cex.lab=1.5,cex.axis=1.5,xlab="distance",ylab="variogram",xlim=c(0,100),ylim=c(0,7)) lines(x1,y1,lwd=2) lines(x2,y2,lwd=2,lty=2) dev.off() # # analysis of Galicia lead pollution data # XYZ<-read.table("../data/galicia_lead.data",header=T) x<-XYZ[,1] y<-XYZ[,2] z<-XYZ[,3] # # histograms of untransformed and log-transformed values of lead concentration (Figure 10.12) # pdf(file="lead_histograms.pdf",height=6,width=8) par(mfrow=c(1,2)) hist(z,xlab="lead concentration",ylab="frequency",cex.lab=1.5,cex.axis=1.5,lwd=2,main=" ") hist(log(z),xlab="log-transformed lead concentration",ylab="frequency",cex.lab=1.5,cex.axis=1.5,lwd=2,main=" ") dev.off() # # convert lead pollution data to a "geodata" object and plot as a map (Figure 10.3) # XYZ<-as.geodata(XYZ) pdf(file="galicia_lead.pdf",height=6,width=8) par(pty="s") pointmap(XYZ$coords,pch=" ",xlim=c(450,700),ylim=c(4600,4850),xlab="X coordinate",ylab="Y coordinate", cex.axis=1.5,cex.lab=1.5) coast<-read.table("../data/Galicia_boundary.data",header=T)/1000 polymap(coast,add=T) scale<-0.4 symbols(XYZ$coords[,1],XYZ$coords[,2],circles=XYZ$data*scale,inches=F,add=T) dev.off() # # compute directional variogram of untransformed lead pollution data (Figure 10.9) # cellwidth<-10 dmax<-100 vario.dir<-directional.variogram(x,y,z,cellwidth,dmax) vmat<-vario.dir$bin$z vmat2<-cbind(vmat[,11],0.5*(vmat[,1:10]+vmat[,12:21])) pdf(file="galicia_directional_vario.pdf",height=6,width=8) image.plot(10*((-10):10),10*(0:10),vmat2,xlab="u",ylab="v",xlim=c(-105,105),ylim=c(-5,105), zlim=c(0,15),asp=1,legend.shrink=1,legend.width=2,cex.lab=1.5, col=gray((0:15)/15),axis.args=list(cex.axis=1.5)) dev.off() # # compute isotropic variogram of untransformed lead pollution data (Figure 10.10) # vario<-variog(XYZ) pdf(file="galicia_vario.pdf",height=6,width=8) plot(vario,ylab="variogram",xlim=c(0,200),ylim=c(0,7),cex.lab=1.7,cex.axis=1.7,pch=19,cex=1.5) dev.off() # # re-compute isotropic variogram of lead pollution after log-transformation (Figure 10.13) # pdf(file="galicia_variofit.pdf",height=6,width=8) lead97<-as.geodata(cbind(x,y,z)) vario97<-variog(lead97,lambda=0) plot(vario97,ylab="variogram",xlim=c(0,200),ylim=c(0,0.3),cex.lab=1.5,cex.axis=1.5,pch=19) # # looks compatible with exponential correlation function as provisional model # get initial estimates for maximum likelihood fitting # variofit97<-variofit(vario97,ini=c(0.2,0.4),cov.model="mat",kappa=0.5,nugget=0.4) parameter estimates: tausq sigmasq phi 0.1536 0.0768 0.4 # re-fit by maximum likelihood # ml97<-likfit(lead97,cov.model="mat",kappa=0.5,ini=c(0.0768,0.4),nugget=0.1536,lambda=0) # re-run to check convergence # likfit: estimated model parameters: # beta tausq sigmasq phi #"1.5422" "0.0830" "0.1465" "19.3045" # # likfit: maximised log-likelihood = -37.2 # tausq<-ml97$tausq sigmasq<-ml97$sigmasq phi<-ml97$phi vario.x<-(0:200) vario97.y<-tausq+sigmasq*(1-exp(-vario.x/phi)) # # add fitted thoeoretical variogram to existing plot (Figure 10.13) # lines(vario.x,vario97.y,lwd=2) dev.off() # # read in digitised boiundary of Galicia and re-scale # Galicia<-read.table(file="../data/Galicia_boundary.data",header=T) galicia.xy<-cbind(Galicia$V1+488000,Galicia$V2-16000)/1000 galicia.xy[,1]<-galicia.xy[,1]+0.05 # # read approximating polygonal boundary # galicia.poly<-read.table("../data/Galicia_poly.data",header=T) # # set up fine grid of prediction locations for mapping predicted lead pollution surface # prediction.locations<- expand.grid(seq(450,700,l=51), seq(4600,4850,l=51)) # # use ordinary kriging with maximum likelihood parameter estimates to compute # predicted lead pollution surface (Figure 10.14) # k.control<-krige.control(obj.model=ml97,cov.model="matern",kappa=0.5,cov.pars=c(sigmasq,phi), nugget=tausq, lambda=0) o.control<-output.control(simulations.predictive=T,quantile=c(0.25, 0.50, 0.75)) k.result<-krige.conv(lead97,locations=prediction.locations,borders=galicia.poly, krige=k.control,output=o.control) range(k.result$predict) # 2.796807 7.822373 pdf("galicia_krige.pdf",paper="special",height=6,width=6) par(pty="s") image(k.result,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5) dev.off() # # compute set of three maps showing lower quartile, median and upper quartile of predicted # lead pollution as each prediciton location (Figure 10.15) # k.lower<-k.result k.lower$predict<-k.result$quantiles.simulations[,1] k.median<-k.result k.median$predict<-k.result$quantiles.simulations[,2] k.upper<-k.result k.upper$predict<-k.result$quantiles.simulations[,3] range(c(k.lower$predict,k.median$predict,k.upper$predict)) #2.036979 9.297778 pdf("galicia_krige_quantiles.pdf",paper="special",height=4,width=12) par(pty="s",mfrow=c(1,3)) image(k.lower,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5) image(k.median,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5) image(k.upper,xlab=" ",ylab=" ",zlim=c(2,10),cex.axis=1.5,cex.lab=1.5) dev.off() # # compute predictive distribuiton of spatial maximum lead concentration and plot # as histogram with 95% predictive limits # maxima<-apply(k.result$simulations, 2,max) n<-length(maxima) # 1000 lower<-sort(maxima)[25] upper<-sort(maxima)[975] pdf(file="galicia_max.pdf",height=6,width=8) hist(maxima,xlab="maximum lead concentration",ylab="frequency",main=" ",cex.lab=1.5,cex.axis=1.5) lines(c(lower,lower),c(0,100),lwd=2,lty=2) lines(c(upper,upper),c(0,100),lwd=2,lty=2) dev.off()