--- title: "Parallel Coordinate Plots" output: html_document: toc: yes --- ```{r global_options, include = FALSE} knitr::opts_chunk\$set(collapse = TRUE) ``` ```{r, include = FALSE} library(lattice) library(tidyverse) library(GGally) set.seed(12345) ``` The same idea as a slope graph, but usually with more variables. Some references: - A [post](https://eagereyes.org/techniques/parallel-coordinates) by Robert Kosara. - [Wikipedia entry](https://en.wikipedia.org/wiki/Parallel_coordinates) - [Paper](http://www.ifs.tuwien.ac.at/~mlanzenberger/teaching/ps/ws07/stuff/00146402.pdf) on recognizing mathematical objects in parallel coordinate plots. Some R implementations: Function Package -------- ------- `parallelplot` `lattice` `ggparcoord` `GGally` `ipcp` `iplots` `ggobi` `rggobi` `parcoords` [`parcoords`](http://www.buildingwidgets.com/blog/2015/1/30/week-04-interactive-parallel-coordinates-1) (on [GitHub](https://github.com/timelyportfolio/parcoords); uses D3) ## Australian Crabs in Parallel Coordinates Using the `crabs` data from `MASS`: ```{r, eval = TRUE} library(GGally) data(crabs, package = "MASS") ggparcoord(crabs) ``` Focus only on the measurements: ```{r, eval = TRUE} ggparcoord(crabs, columns = 4:8) ``` Color by `sex`: ```{r, eval = TRUE} ggparcoord(crabs, columns = 4:8, groupColumn = "sex") ``` Color by `sp`: ```{r, eval = TRUE} ggparcoord(crabs, columns = 4:8, groupColumn = "sp") ``` After scaling by `CL`: ```{r, eval = TRUE} cr <- mutate(crabs, FLCL = FL/CL, RWCL = RW/CL, CWCL = CW/CL, BDCL = BD/CL) ggparcoord(cr, columns = 9:12, groupColumn="sp") ``` Reorder the variables: ```{r, eval = TRUE} ggparcoord(cr, columns = c(10, 9, 11, 12), groupColumn = "sp") ``` Reorder again: ```{r, eval = TRUE} ggparcoord(cr, columns = c(10, 9, 12, 11), groupColumn = "sp") ``` Reverse the `CWCL` variable: ```{r, eval = TRUE} ggparcoord(mutate(cr, CWCL = -CWCL), columns = c(10, 9, 12, 11), groupColumn = "sp") ``` - The patterns for `FLCL`, `CWLC`, and `BDCL` for the two species differ. - This corresponds to the discriminator `FLCL + BDCL - CWCL` found with scatter plots ## Olive Oils in Parallel Coordinates ```{r, eval = TRUE} data(olive, package = "tourr") olive\$Region <- factor(olive\$region, labels = c("North", "South", "Sardinia")) ggparcoord(olive, groupColumn = "Region", columns = 3 : 8) ``` `South` is separated out by high values of `eicosenoic` Look at the other regions: ```{r, eval = TRUE} ons <- filter(olive, Region != "South") ons <- droplevels(ons) ggparcoord(ons, groupColumn="Region", columns = 3:10) ``` `linoleic` seems to allow some separation of `North` and `Sardinia` Rearrange to place `linoleic` next to `arachidic`: ```{r, eval = TRUE} ggparcoord(ons, groupColumn="Region", columns = c(3:7, 9, 8, 10)) ``` This shows the joint discriminator found with scatter plots. ## Interactive Approaches Interactive version in `iplots`: ```{r, eval = FALSE} library(iplots) ipcp(cr) ``` ```{r, eval = FALSE} ipcp(cr[-(3:8)]) ``` ```{r, eval = FALSE} ipcp(cr[c(1, 2, 10, 9, 12, 11)]) ``` Interactive version in `rggobi`: ```{r, eval = FALSE} library(rggobi) ggobi(cr) ``` Interactive version using the D3 library via the `parcoords` package: ```{r, include = FALSE} data(crabs, package = "MASS") cr <- mutate(crabs, FLCL = FL/CL, RWCL = RW/CL, CWCL = CW/CL, BDCL = BD/CL) ``` ```{r} parcoords::parcoords(cr[c(1, 2, 9:12)], , rownames = FALSE, reorder = TRUE, brushMode="1D", color = list( colorScale = htmlwidgets::JS('d3.scale.category10()'), colorBy = "sp")) ``` ## Some Calibration Examples ```{r} x <- rnorm(100) d1 <- data.frame(x1 = x, x2 = rnorm(x), x3 = x) d2 <- mutate(d1, x3 = -x) ``` ```{r, eval = TRUE} ggparcoord(d1) library(lattice) parallelplot(d1) parallelplot(d1, horizontal.axis = FALSE) ``` Mostly parallel lines indicate positive association: ```{r, eval = TRUE} ggparcoord(d1[c(1, 3, 2)]) ``` Near intersection in a point indicates negative association: ```{r, eval = TRUE} ggparcoord(d2) ggparcoord(d2[c(1, 3, 2)]) ``` A quadratic relationship: ```{r, eval = TRUE} ggparcoord(mutate(d2, x3 = x1 ^ 2)[c(1, 3, 2)]) ``` ```{r, eval = FALSE, include = FALSE} n <- 1000 s <- 0.3 x1 <- rnorm(n) d <- data.frame(x1, x2 = x1 + rnorm(n, 0, s), x3 = x1 + rnorm(n, 0, s)) ggparcoord(d) ggparcoord(d, alpha = 0.1) splom(d) ggparcoord(mutate(d, x3 = -x3), alpha = 0.1) splom(mutate(d, x2 = rnorm(n), x3 = -x3)) d2 <- mutate(d, x2 = rnorm(n), x3 = -x3) ggparcoord(d2) ggparcoord(d2, alpha = 0.1) ggparcoord(d2, alpha = 0.1, columns=c(1,3,2)) parallelplot(d2) parallelplot(d2, horizontal = TRUE) parallelplot(d2, horizontal = FALSE) parallelplot(d2, horizontal = FALSE, group = d2\$x1 > 0) library(iplots) ipcp(d) ipcp(d2) ``` ## Diamonds Data Using a sample of 5000 observations (about 10%) and `parallelplot` from `lattice`: ```{r, eval = TRUE} library(ggplot2) ds <- diamonds[sample(nrow(diamonds), 5000),] parallelplot(~ds, group = cut, data = ds, horizontal.axis = FALSE, auto.key = TRUE) parallelplot(~ds, group = cut, data = ds, horizontal.axis = FALSE, auto.key = TRUE, panel = function(...) { panel.parallel(...) levs <- levels(ds\$cut) panel.text(2, seq(0, 1, len = length(levs)), levs) }) ``` Rearrange variables: ```{r, eval = TRUE} ds1 <- select(ds, cut, carat, price, x, y, z) parallelplot(~ds1, group = cut, data = ds1, horizontal.axis = FALSE, auto.key = TRUE, panel = function(...) { panel.parallel(...) levs <- levels(ds\$cut) panel.text(2, seq(0, 1, len = length(levs)), levs) }) ``` Conditioning on `cut`: ```{r, eval = TRUE} dsnc <- select(ds, -cut) parallelplot(~ dsnc | cut, data = ds, horizontal.axis = FALSE, scales = list(x = list(rot = 45))) parallelplot(~dsnc | cut, data = ds, col = "black") parallelplot(~dsnc | cut, data = ds, col = "black", alpha = 0.05) ``` Rearrange variables: ```{r, eval = TRUE} ds1nc <- select(ds1, -cut) parallelplot(~ ds1nc | cut, data = ds1, col = "black", alpha = 0.05) ``` Variations using `ggparcoords` and a smaller sample: ```{r, eval = TRUE} ds <- diamonds[sample(nrow(diamonds), 500),] ggparcoord(ds, scale = "uniminmax", groupColumn = "cut") ggparcoord(ds, scale = "uniminmax", groupColumn = "cut", columns = c(1, 3:10)) ``` ```{r, eval = TRUE} ds1 <- mutate(ds, ncut = as.numeric(cut)) ggparcoord(ds1, scale = "uniminmax", groupColumn = "cut", columns = c(1, 3:11)) ``` Using separate facets for the `cut` levels: ```{r, eval = TRUE} ggparcoord(ds, scale = "uniminmax", columns = c(1, 3:10)) + facet_wrap(~ ds\$cut) + coord_flip() ``` Adding box plots and violin plots: ```{r, eval = TRUE} ggparcoord(ds, scale = "uniminmax", columns = c(1, 3:10), alphaLines = 0.1, boxplot = TRUE) + facet_wrap(~ ds\$cut) + coord_flip() ``` ```{r, eval = TRUE} ggparcoord(ds, scale = "uniminmax", columns=c(1, 3:10), alphaLines = 0.1) + geom_boxplot(aes_string(group = "variable"), width = 0.3, outlier.color = NA) + facet_wrap(~ds\$cut) + coord_flip() ``` ```{r, eval = TRUE} ggparcoord(ds, scale = "uniminmax", columns=c(1, 3:10), alphaLines = 0.1) + geom_violin(aes_string(group = "variable"), width = 0.5) + facet_wrap(~ds\$cut) + coord_flip() ``` ## Useful Adjustments and Additions Useful adjustments: - vary axis scaling; - show axis labels; - alpha blending; - color by category or range; - filter/hide; - reorder axes; - reverse axes; - reorder categories; An interactive implementation should ideally support all of these. Another useful feature is to be able to record the adjustments made.