Rcode: Loess and GAM


Three data sets from the faraway package

library(faraway)

par(mfrow=c(1,3))
data(faithful)
plot(waiting ~ eruptions, faithful,main="old Faithful")
data(exa)
plot (y ~ x, exa,main="Example A")
lines(m ~ x, exa)
data(exb)
plot(y ~ x, exb,main="Example B")
lines(m ~ x, exb)

Fit the three data sets using loess

par(mfrow=c(1,3))
plot(waiting ~ eruptions, faithful,col="gray", cex=0.5)
f=loess(waiting ~ eruptions, faithful)
i = order(faithful$eruptions)
lines(f$x[i],f$fitted[i], lwd=1.5, col="red")

plot(y ~ x, exa, col="gray", cex=0.5)
lines(exa$x,exa$m,lty=1)
f = loess(y ~ x,exa)
lines(f$x,f$fitted,lty=2, lwd=1.5, col="red")
f = loess(y ~ x, exa, span=0.22)
lines(f$x,f$fitted,lty=5, lwd=1.5, col="blue")

plot(y ~ x, exb, col="gray", cex=0.5)
lines(exb$x,exb$m, lty=1)
f =loess(y ~ x, exb)
lines(f$x,f$fitted,lty=2,lwd=1.5, col="red")
f = loess(y ~ x, exb,span=1)
lines(f$x,f$fitted,lty=5, lwd=1.5, col="blue")

Load the ozone data and fit a linear regression model for the ozone concentration O3

olm = lm(O3 ~ temp + ibh + ibt, ozone)
summary(olm)

Use the gam package

library(gam)

amgam = gam(O3 ~ lo(temp) + lo(ibh) + lo(ibt), data=ozone)
summary(amgam)

names(amgam)
amgam$deviance       ## residual sum of squares
amgam$null.deviance  ## total variation of y
1 - amgam$deviance/amgam$null.deviance   ##  R-square

The R-square is slightly better than the linear model, but gam uses more df. Let’s use F-test to compare the two models.

anova(olm, amgam, test="F")

The gam package uses a score test for predictors. However, the p-values are only approximated at best and should be viewed with some skepticism. It is generally better to refit the model without the predictor of interest and then construct the F-test.

amgamr = gam(O3 ~ lo(temp) + lo(ibh) + ibt, data=ozone)
anova(amgamr, amgam, test="F")

amgamr = gam(O3 ~ lo(temp) + lo(ibh), data=ozone)
anova(amgamr, amgam, test="F")

The residual plots also support that ibt may not be significant.

par(mfrow=c(2,3))
plot(amgam,residuals=TRUE,se=TRUE,cex=0.75, lwd=1.5, col="gray")
plot(amgam,se=TRUE,lwd=1.5)