Rcode: Smoothing Splines


Load necessary libraries and check the help file.

library(ggplot2)
library(splines)
help(smooth.spline)

Generate data from a simulated example.

n=30; err = 1;
x = runif(n); x=sort(x);
y = sin(12*(x+0.2))/(x+0.2) + rnorm(n, 0, err);
plot(x,y, col="red");

fx=1:100/100;                   ## a dense grid
fy = sin(12*(fx+0.2))/(fx+0.2)
lines(fx, fy, col=8, lwd=2, ylab="")

Fit smoothing splines with various degree-of-freedoms (dfs)

ss5 = predict(smooth.spline(x, y, df=5),fx)
names(ss5)

## predict returns two columns, x" and "y", and we only 
## need the "y" column
ss5 = ss5$y
ss8 = predict(smooth.spline(x, y, df=8),fx)$y
ss15 = predict(smooth.spline(x, y, df=15),fx)$y

Plot the fitted curves using ggplot

## first create a dataframe for ggplot
tmpdata = data.frame(cbind(x=fx, f=fy, 
          ss5=ss5,ss8=ss8,ss15=ss15))

baseplot = ggplot(tmpdata, aes(x=x)) + ylab("") + xlab("x")

baseplot + geom_line(aes(y=f), color='black', size=1.2) + 
  geom_line(aes(y=ss5, colour="df5"), linetype=2, size=1.1) + 
  geom_line(aes(y=ss8, colour="df8"), linetype=2, size=1.1) + 
  geom_line(aes(y=ss15, colour="df15"), linetype=2, size=1.1) + 
  scale_colour_manual("", 
                      breaks = c("df5", "df8", "df15"),
                      values = c("red", "green", "blue")) +
  geom_point(data=data.frame(cbind(x, y)), aes(x=x, y=y), size=3)

Here is how we obtain the Demmler & Reinsch (DR) Basis: we first obtain the smoother matrix S (which is not returned by R, so we write our own script to compute it), and then the eigen-vectors of S are basically the DR basis functions.

smooth.matrix = function(x, df){
## return the smoother matrix with knots x and df
## this function is for x having unique values
 n = length(x);
 A = matrix(0, n, n);
 for(i in 1:n){
       y = rep(0, n); y[i]=1;
       yi = smooth.spline(x, y, df=df)$y;
       A[,i]= yi;
}
 return(A)
}
[code]

[code lang="r"]
S4 = smooth.matrix(x, df=4);
S9 = smooth.matrix(x, df=9);
tmp=ns(x, df=9, intercept=TRUE)
H9 = tmp%*%solve(t(tmp)%*%tmp)%*%t(tmp);

## get the eigen value and eigen vector of the 
## smoother/projection matrix
eigen.S4 = eigen(S4);
eigen.S9 = eigen(S9);
eigen.H9 = eigen(H9);

v4 = eigen.S4$ve;
v9=  eigen.S9$ve;
dim(v4)
dim(v9)

Plot the eigen-vectors, i.e., columns of v4/v9, which are the DR basis functions (up to a sign flip).

par(mfrow=c(3,4));
for(i in 1:12) {
  plot(c(min(x), max(x)),c(min(v4, v9), max(v4, v9)), 
       xlab="x", ylab="", type="n");
  lines(x, v4[,i], col=2,lty=1, lwd=2.5);
  lines(x, v9[,i], col=3, lty=2, lwd=2.5);
}

Plot the eigen values: note that the first two eigen values are always 1, since smoothing-splines do not penalize the linear terms (which correspond to a two-dimensional subspaces therefore have two eigen-values).

plot(eigen.S4$va, pch=1, col="red", 
    xlab='',ylab='eigen values', cex=1.5);
points(eigen.S9$va, pch=4, col="blue", cex=1.5);
points(eigen.H9$va, pch=5, col="black", cex=1.5);
lines(c(0,n), c(1, 1), col=8, lty=2, lwd=1.5);
legend("topright", pch=c(1,4,5), col=c("red", "blue", "black"), 
     legend=c("SS with df=4", "SS with df=9", "NCS with df=9"))

Use CV and GCV to select the degree-of-freedom. Note that smoothing splines could have fractional dimensions (or dfs)

myfit = smooth.spline(x, y);  ## Default: GCV
names(myfit)
myfit$df
myfit$cv.crit

## set 'cv=TRUE' for cross-validation 
myfit=smooth.spline(x, y, cv=TRUE)