--- title: "Smoothing Examples" author: "Luke Tierney" output: html_document --- ```{r global_options, include=FALSE} knitr::opts_chunk$set(collapse=TRUE) options(rgl.useNULL=TRUE) knitr::knit_hooks$set(webgl = rgl::hook_webgl) ``` ## Density Estimates Histogram and density estimates for the `geyser` data: ```{r} library(MASS) data(geyser) truehist(geyser$duration,nbin=25,col="lightgrey") lines(density(geyser$duration)) lines(density(geyser$duration,bw="SJ"), col="red") lines(density(geyser$duration,bw="SJ-dpi"), col="blue") ``` Animation using `tkrplot`: ```{r} source("http://www.stat.uiowa.edu/~luke/classes/STAT7400/examples/tkdens.R") ``` ```{r, eval = FALSE} tkdens(geyser$duration, tkrplot = TRUE) ``` Animation using `shiny` ```{r} source("http://www.stat.uiowa.edu/~luke/classes/STAT7400/examples/shinydens.R") ``` ```{r, eval = FALSE} shinyDens(geyser$duration) ``` Animation to illustrate variability (needs to be ajdusted for use in RStudio): ```{r, eval = FALSE} data(geyser, package = "MASS") g <- geyser$duration for (i in 1:1000) { g[sample(299,1)] <- geyser$duration[sample(299,1)] plot(density(g,bw="SJ"),ylim=c(0,1.2),xlim=c(0,6)) Sys.sleep(1/30) } ``` Hexagonal binning: ```{r} library(hexbin) geyser2 <- data.frame(as.data.frame(geyser[-1,]), pduration=geyser$duration[-299]) plot(hexbin(geyser2$pduration, geyser2$waiting)) ``` Two-dimensional kernel density estimate contours: ```{r} geyser2 <- data.frame(as.data.frame(geyser[-1,]), pduration=geyser$duration[-299]) ``` ```{r} kd1 <- with(geyser2, kde2d(pduration, waiting, n = 50, lims = c(0.5, 6, 40, 110))) contour(kd1, col = "grey", xlab = "Previous Duration", ylab = "waiting") with(geyser2, points(pduration, waiting, col = "blue")) ``` Using the `SJ` bandwidths: ```{r} kd2 <- with(geyser2, kde2d(pduration, waiting, n = 50, lims = c(0.5, 6, 40, 110), h= c(width.SJ(pduration), width.SJ(waiting)))) contour(kd2, xlab = "Previous Duration", ylab = "waiting") ``` ### `tkrplot` Examples ```{r, eval = FALSE} with(geyser2, tkdens2d(pduration, waiting, n = 50, lims = c(0.5, 6, 40, 110), bw = c(width.SJ(pduration), width.SJ(waiting)), tkrplot = TRUE)) ``` ```{r, eval = FALSE} with(geyser2, tkdens2d(pduration, waiting, n = 50, lims = c(0.5, 6, 40, 110), bw = c(width.SJ(pduration), width.SJ(waiting)), show = contour, tkrplot = TRUE)) ``` ```{r, eval = FALSE} with(geyser2, tkdens2d(pduration,waiting,n = 50, lims = c(0.5, 6, 40, 110), bw = c(width.SJ(pduration), width.SJ(waiting)), show = function(x) { image(x); contour(x, add = TRUE)}, tkrplot = TRUE)) ``` ### Surface View Using `rgl` ```{r} library(rgl) ``` ```{r, webgl = TRUE} with(geyser2, { N <- 100 kfit <- kde2d(pduration, waiting, n = N, lims = c(0.5, 6, 40, 110), h = c(width.SJ(pduration), width.SJ(waiting))) surface3d(1 : N, 1 : N, 0.35 * N * kfit$z / max(kfit$z), col = "green") }) ``` ### Using `shiny` ```{r, eval = FALSE} with(geyser2, shinyDens2d(pduration, waiting, n = 50, lims = c(0.5, 6, 40, 110), bw = c(width.SJ(pduration), width.SJ(waiting)), show = function(x) { image(x); contour(x, add = TRUE)})) ``` ### 3D Density Estimates ```{r} library(rgl) library(misc3d) ``` Some tri-variate normal data ```{r} x<-matrix(rnorm(9000),ncol=3) x[1001:2000,2]<-x[1001:2000,2]+4 x[2001:3000,3]<-x[2001:3000,3]+4 g<-expand.grid(x = seq(-4,8.5,len=30), y = seq(-4,8.5,len=30), z = seq(-4,8.5,len=30)) ``` Density estimates for two bandwidths ```{r} d <- kde3d(x[,1], x[,2], x[,3], 0.2, 30, c(-4,8.5))$d d2 <- kde3d(x[,1], x[,2], x[,3], 0.5, 30, c(-4,8.5))$d ``` 3D plot of data and contour for bw = 0.2 ```{r, webgl = TRUE} clear3d() points3d(x = x[,1], y = x[,2], z = x[,3],size=2, color="black") contour3d(d, 20 / 3000, seq(-4,8.5,len=30), seq(-4,8.5,len=30),seq(-4,8.5,len=30), color="red",alpha=0.5,add=TRUE) ``` 3D plot of data and contour for bw = 0.5 ```{r, webgl = TRUE} clear3d() points3d(x = x[,1], y = x[,2], z = x[,3],size=2, color="black") contour3d(d2, 20 / 3000, seq(-4,8.5,len=30), seq(-4,8.5,len=30),seq(-4,8.5,len=30), color="red",alpha=0.5,add=TRUE) ``` 3D plot of several contours for bw = 0.5 ```{r, webgl = TRUE} clear3d() contour3d(d2, c(10, 25, 40) / 3000, seq(-4,8.5,len=30), seq(-4,8.5,len=30), seq(-4,8.5,len=30), color=c("red", "green", "blue"), alpha=c(0.2, 0.5, 0.7),add=TRUE) ``` ## Kernel Smooths and Smoothing Splines ```{r} data(geyser, package = "MASS") geyser2 <- data.frame(as.data.frame(geyser[-1,]), pduration=geyser$duration[-299]) with(geyser2, plot(pduration,waiting)) with(geyser2, lines(lowess(pduration,waiting), col = "red")) with(geyser2, lines(supsmu(pduration,waiting), col = "blue")) with(geyser2, lines(ksmooth(pduration,waiting), col = "green")) with(geyser2, lines(smooth.spline(pduration,waiting), col = "orange")) ``` ### `tkrplot` Examples ```{r} source("http://www.stat.uiowa.edu/~luke/classes/STAT7400/examples/tkdens.R") ``` ```{r, eval = FALSE} with(geyser2, tkranim(function(v) { plot(pduration, waiting) lines(smooth.spline(pduration, waiting, spar = v)) }, 1, 0, 1.5)) ``` ```{r, eval = FALSE} with(geyser2, tkranim(function(v) { plot(pduration, waiting) lines(ksmooth(pduration, waiting, bandwidth = v)) }, 1, 0, 5)) ``` ```{r, eval = FALSE} with(geyser2, tkranim(function(v) { plot(pduration, waiting) lines(lowess(pduration, waiting, f = v)) }, 0.5, 0.05, 1)) ``` ### `shiny` Examples ```{r, eval = FALSE} with(geyser2, shinyAnim(function(v) { plot(pduration, waiting) lines(smooth.spline(pduration, waiting, spar = v)) }, 1, 0, 1.5)) ``` ```{r, eval = FALSE} with(geyser2, shinyAnim(function(v) { plot(pduration, waiting) lines(lowess(pduration, waiting, f = v)) }, 2/3, 0.01, 1)) ``` ```{r, eval = FALSE} with(geyser2, shinyAnim(function(v) { plot(pduration, waiting) lines(ksmooth(pduration, waiting, bandwidth = exp(v) - 1)) }, 0.5, 0.01, 2.5)) ``` ### Fit Using `SemiPar` ```{r} library(MASS) geyser2 <- data.frame(as.data.frame(geyser[-1,]), pduration=geyser$duration[-299]) library(SemiPar) attach(geyser2) # needed because of flaws in spm implementation summary(spm(waiting ~ f(pduration))) plot(spm(waiting ~ f(pduration)), ylim = range(waiting)) points(pduration, waiting) ``` ### Fit Using `mgcv` ```{r} library(mgcv) gam.fit <- gam(waiting ~ s(pduration), data = geyser2) summary(gam.fit) plot(gam.fit) with(geyser2, points(pduration, waiting - mean(waiting))) ``` ### Smoothing with Multiple Predictors Scallop catches with `SemiPar`: ```{r} library(SemiPar) data(scallop) attach(scallop) log.catch <- log(tot.catch + 1) fit <- spm(log.catch ~ f(longitude, latitude)) summary(fit) ``` ```{r} ##plot(fit) lon <- seq(-73.7, -71.7, len = 50) lat <- seq(38.6, 41.0, len = 50) g <- expand.grid(longitude = lon, latitude = lat) z <- matrix(predict(fit, g), 50) persp(z = z, x = lon, y = lat) contour(z = z, x = lon, y = lat) points(x = scallop$longitude, y = scallop$latitude) ``` ```{r, webgl = TRUE} library(rgl) clear3d() surface3d(x = lon, y = lat, z = scale(z), color = "red") ``` With `mgcv`: ```{r} library(mgcv) scallop.gam <- gam(log.catch ~ s(longitude, latitude), data = scallop) summary(scallop.gam) plot(scallop.gam) ``` ```{r, webgl = TRUE} zz <- predict(scallop.gam, g) clear3d() surface3d(x = lon, y = lat, z = scale(zz), color = "red") ```