--- ############################################################# # # # In RStudio click on "Run Document" to run this tutorial # # # ############################################################# title: "Visualizing Amounts" author: "Luke Tierney" output: learnr::tutorial runtime: shiny_prerendered --- ```{r setup, include = FALSE} library(learnr) library(tidyverse) knitr::opts_chunk$set(echo = FALSE, comment = "", warning = FALSE) <> <> ``` ```{r stop_when_browser_closes, context = "server"} # stop the app when the browser is closed (or, unfortunately, refreshed) session$onSessionEnded(stopApp) ``` ## Average Life Expectancy by Continent These exercises will use `gapminder` data: ```{r load-packages, echo = TRUE, eval = FALSE} library(ggplot2) library(dplyr) library(gapminder) ``` In particular, you will look at average life expectancy values for the continents in the each of the years covered by the data set: ```{r prepare-data, echo = TRUE, eval = FALSE} avgLE <- group_by(gapminder, continent, year) %>% summarize(avgLifeExp = mean(lifeExp)) %>% ungroup() ``` ## Some Dot Plots Start by creating a dot plot for the average life expectancy values for the different continents in 2007. Show the continent values in increasing order. ```{r avgLE-2007, exercise = TRUE} ``` ```{r avgLE-2007-solution} filter(avgLE, year == 2007) %>% mutate(continent = reorder(continent, avgLifeExp)) %>% ggplot(aes(x = avgLifeExp, y = continent)) + geom_point() ``` Next, show the values for both 1952 and 2007, using color to identify the years. Increasing the points size will make the colors easier to see. ```{r avgLE-1952-2007, exercise = TRUE} ``` ```{r avgLE-1952-2007-solution} filter(avgLE, year == 2007 | year == 1952) %>% mutate(continent = reorder(continent, avgLifeExp), year = factor(year)) %>% ggplot(aes(x = avgLifeExp, y = continent, color = year)) + geom_point(size = 3) ``` A _dumbbell_ chart can emphasize the sizes of the changes from 1952 to 2007. Modify your chart to show a dumbbell chart. ```{r aveLE-db, exercise = TRUE} ``` ```{r aveLE-db-solution} filter(avgLE, year == 2007 | year == 1952) %>% mutate(continent = reorder(continent, avgLifeExp), year = factor(year)) %>% ggplot(aes(x = avgLifeExp, y = continent, color = year)) + geom_line(aes(group = continent), size = 2, color = "black") + geom_point(aes(color = year), size = 4) ``` ## Some Bar Charts Now create a bar chart for the average life expectancy values for the different continents in 2007. Again show the continent values in increasing order. ```{r avgLE-bar, exercise = TRUE} ``` ```{r avgLE-bar-solution} filter(avgLE, year == 2007) %>% mutate(continent = reorder(continent, avgLifeExp)) %>% ggplot(aes(x = avgLifeExp, y = continent)) + geom_col() ``` Use a side-by-side bar chart to show the life expectancy values for 1952 and 2007. ```{r avgLE-dodge, exercise = TRUE} ``` ```{r avgLE-dodge-solution} filter(avgLE, year == 2007 | year == 1952) %>% mutate(continent = reorder(continent, avgLifeExp), year = factor(year)) %>% ggplot(aes(x = avgLifeExp, y = continent, fill = year)) + geom_col(position = "dodge") ``` Another option for showing the two years is is to use faceting on year. ```{r avgLE-bar-facet, exercise = TRUE} ``` ```{r avgLE-bar-facet-solution} filter(avgLE, year == 2007 | year == 1952) %>% mutate(continent = reorder(continent, avgLifeExp), year = factor(year)) %>% ggplot(aes(x = avgLifeExp, y = continent)) + geom_col() + facet_wrap(~ year, ncol = 1) ``` ## Exercises ### Exercise 1 A plot similar to this was featured in a CNN news story several years ago: ```{r, echo = FALSE} library(ggplot2) levs <- c("Democrats", "Repubicans", "Independents") d <- data.frame(party = factor(levs, levs), pct = c(62, 54, 54)) ggplot(d, aes(x = party, y = pct - 50)) + geom_col(width = 0.5) + scale_y_continuous(labels = seq(50, 64, by = 2), breaks = seq(0, 14, by = 2), expand = expansion(c(0, 0.18))) + labs(x = "Political Party", y = NULL, title = "Percent Who Agreed With Court") + theme(text = element_text(size = 20, face = "bold"), panel.grid.minor.x = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.y = element_blank(), panel.grid.major.y = element_line(color = "black"), panel.background = element_rect(fill = "grey", color = NA), plot.background=element_rect(fill = "white", color = "black", size = 2), plot.margin=margin(20, 20, 20, 20)) + coord_fixed(0.1) ``` ```{r court-question, echo = FALSE} question( "Which of the following is approximately correct:", answer("About the same number of democrats as republicans agreed with the court."), answer("About 15% more democrats than republicans agreed with the court.", correct = TRUE), answer("About tree times as many democrats than republicans agreed with the court."), answer("About two times as many democrats than republicans agreed with the court."), random_answer_order = TRUE, allow_retry = TRUE ) ``` ### Exercise 2 Consider the stacked bar chart produced by the following code: ```{r four-cyl-exercise, exercise = TRUE} library(tidyverse) mpg2 <- mutate(mpg, class = fct_rev(fct_infreq(class)), cyl = factor(cyl)) p <- ggplot(mpg2, aes(y = class, fill = cyl)) + geom_bar() ``` ```{r four-cyl-question, echo = FALSE} question( "Which of these modifications makes it easiest to compare the count of 4-cylinder models within the different classes?", answer("`p %+% mutate(mpg2, cyl = factor(cyl, c(4, 5, 6, 8)))`"), answer("`p %+% mutate(mpg2, cyl = factor(cyl, c(5, 6, 4, 8)))`"), answer("`p %+% mutate(mpg2, cyl = factor(cyl, c(4, 6, 8, 5)))`"), answer("`p %+% mutate(mpg2, cyl = factor(cyl, c(8, 6, 5, 4)))`", correct = TRUE), random_answer_order = TRUE, allow_retry = TRUE ) ``` ### Exercise 3 The bar chart produced by the following code has `x` axis labels that could be improved: ```{r not-better-x-exercise, exercise = TRUE} library(gapminder) library(dplyr) library(ggplot2) library(scales) p <- filter(gapminder, year == 2007) %>% group_by(continent) %>% summarize(avgGdpPercap = mean(gdpPercap)) %>% ggplot(aes(x = avgGdpPercap, y = continent)) + geom_col() + labs(x = "Average GDP Per Capita", y = NULL) + theme_minimal() + theme(text = element_text(size = 16)) ``` There are a number of different options. ```{r, not-better-x-question, echo = FALSE} question( "Which of the following does **not** provide improved `x` axis labels?", answer("`p + scale_x_continuous(labels = label_comma())`"), answer("`p + scale_x_continuous(labels = label_dollar())`"), answer("`p + scale_x_continuous(labels = unit_format(scale = 1/1000, unit = \"K\", prefix = \"$\"))`"), answer("`p + scale_x_continuous(labels = c(\"$10,000\", \"$20,000\", \"$30,000\"))`", correct = TRUE), random_answer_order = TRUE, allow_retry = TRUE ) ``` ### Exercise 4 A stacked bar chart is appropriate if the combined bar heights of the stacked bars have a reasonable interpretation. Consider the following two plots: ```{r, fig.height = 4, fig.width = 8} library(gapminder) library(dplyr) library(ggplot2) library(patchwork) p1 <- filter(gapminder, year >= 2000) %>% group_by(continent, year) %>% summarize(avgLifeExp = mean(lifeExp), .groups = "drop") %>% ggplot(aes(x = avgLifeExp, y = continent, fill = factor(year))) + geom_col() + theme_minimal() + theme(text = element_text(size = 12)) + scale_x_continuous(expand = expansion(mult = c(0, .1))) + labs(x = "Average Life Expectancy", y = NULL, fill = "Year", title = "Average Life Expectancy\nby Continent for Two Years", tag = "P1:") p2 <- count(mpg, class, cyl) %>% ggplot(aes(x = n, y = class, fill = factor(cyl))) + geom_col() + theme_minimal() + theme(text = element_text(size = 12)) + scale_x_continuous(expand = expansion(mult = c(0, .1))) + labs(x = "Number of Models", y = NULL, fill = "Cylinders", title = "Number of Car Models\nby Class and Cylinder Count", tag = "P2:") p1 + p2 ``` ```{r stacked-bar-question, echo = FALSE} question( "Which of the following statements is true:", answer("P1 is an appropriate use of a stacked bar chart but P2 is not."), answer("P2 is an appropriate use of a stacked bar chart but P1 is not.", correct = TRUE), answer("Neither P1 nor P2 is an appropriate use of a stacked bar chart."), answer("Both P1 and P2 are appropriate uses of a stacked bar chart."), random_answer_order = TRUE, allow_retry = TRUE ) ```