# # A sample R session. # # Newcomers to R may find it helpful to run this file line-by-line to # get an appreciation of how R operates. If you are used to menu-driven # software, you may find that the combination of command-driven and # interactive working needs some mental re-adjustment initially. # # assign the integers 1 to 100 to a vector, with name x, and print # the first 5 values onto the screen x<-1:100 x[1:5] # simulate data from a quadratic regression model whose residuals are Normally # distributed with standard deviation 12 mu<-2+0.5*x+0.01*x*x z<-12*rnorm(100) y<-mu+z # display the first five (x,y) pairs (no assignment), with # y-values rounded to 3 decimal places cbind(x[1:5],round(y[1:5],3)) # draw a scatterplot of x against y plot(x,y) # customize the scatterplot using optional arguments to the # plot() function plot(x,y,pch=19,col="red",cex=0.5,main="plot of x vs y") # fit a linear regression model to the simulated data # and summarise the result fit1<-lm(y~x) summary(fit1) # list the names of the components of the R object that stores # information about the fitted model names(fit1) # these components are individually accessible using the $ sign to # indicate which component you want fit1$coef alpha<-fit1$coef[1] beta<-fit1$coef[2] # add the fitted regression line to the scatterplot lines(x,alpha+beta*x) # the plot now shows clearly that the linear model does not give # good fit to the data (of course!), so we now fit the (correct) # quadratic model and add the fitted quadratic curve xsq<-x*x fit2<-lm(y~x+xsq) alpha<-fit2$coef[1] beta<-fit2$coef[2] gamma<-fit2$coef[3] muhat<-alpha+beta*x+gamma*xsq lines(x,muhat,col="blue",lwd=2) # we'll now write our own function to fit and plot the # quadratic regression model quadfit<-function(x,y,plot.result=F,x.plot=x) { # Arguments: # x: values of the explanatory variable # y: values of the response variable # plot.result: if T (true), plot of data and fitted regression curve is produced, # if omitted plot is not produced because default is F (false) # if omitted # x.plot: values of the explanatroy variable usaed to draw the fitted # curve, if omitted uses same values as x, but sorted into # ascending order # Result: # A standard R model object, plus (if plot.tesult=T) a plot of data # and fitted regression curve drawn on the current plotting device xsq<-x*x fit<-lm(y~x+xsq) alpha<-fit$coef[1]; beta<-fit$coef[2]; gamma<-fit$coef[3] if (plot.result==T){ x.plot<-sort(x.plot) plot(x,y,cex=0.5, col="red",main= "data and quadratic fit") y.plot=alpha+beta*x.plot+gamma*x.plot*x.plot lines(x.plot,y.plot,lwd=2,col= "blue") } fit } # now use this function in default and non-default modes quadfit(x,y) # this writes the output to the screen but does not save it for future # use, nor does it produce a plot fit3<-quadfit(x,y,plot.result=T,x.plot=0.2*(0:500)) # the plotting option is now invoked by typing `plot.result=T' # (T for true), also typing `x.plot=0.2*(0:500))' gives a smoother curve # than the default would have done # other output is assigned rather than displayed - but can be displayed if you wish # note that fit2 and fit3 should be identical - check by displaying summaries summary(fit2) summary(fit3) # now quit R - you will be invited to save your work if you wish, in # which case it will be loaded when you next run the R program q()