--- ############################################################# # # # In RStudio click on "Run Document" to run this tutorial # # # ############################################################# title: "Data and Data Frames" author: "Luke Tierney" output: learnr::tutorial runtime: shiny_prerendered --- ```{r setup, include = FALSE} library(learnr) library(tidyverse) knitr::opts_chunk$set(echo = FALSE, comment = "", warning = FALSE) ``` ```{r stop_when_browser_closes, context = "server"} # stop the app when the browser is closed (or, unfortunately, refreshed) session$onSessionEnded(stopApp) ``` ## Exercises ### Exercise 1 Which of the Stevens classifications (nominal, ordinal, interval, ratio) best characterizes these variables: ```{r stevens_classes_temp, echo = FALSE} question("Daily maximal temperatures in Iowa City.", answer("nominal"), answer("ordinal"), answer("interval", correct = TRUE), answer("ratio"), allow_retry = TRUE ) ``` ```{r stevens_classes_pop, echo = FALSE} question("Population counts for Iowa counties.", answer("nominal"), answer("ordinal"), answer("interval"), answer("ratio", correct = TRUE), allow_retry = TRUE ) ``` ```{r stevens_classes_edlev, echo = FALSE} question("Education level of job applicants using the [Bureau of Labor Statistics classification](https://www.bls.gov/careeroutlook/2014/article/education-level-and-jobs.htm).", answer("nominal"), answer("ordinal", correct = TRUE), answer("interval"), answer("ratio"), allow_retry = TRUE ) ``` ```{r stevens_classes_major, echo = FALSE} question("Major of UI students.", answer("nominal", correct = TRUE), answer("ordinal"), answer("interval"), answer("ratio"), allow_retry = TRUE ) ``` ### Exercise 2 ```{r tidy_or_not, echo = FALSE} question("Which of these data sets are in tidy form?", answer("The builtin data set `co2`"), answer("The builtin data set `BOD`", correct = TRUE), answer("The `who` data set in package `tidyr` (`tidyr::who`)"), answer("The `mpg` data set in package `ggplot2` (`ggplot2::mpg`", correct = TRUE), allow_retry = TRUE ) ``` ```{r tidy_or_not_workspace, exercise = TRUE} ``` ### Exercise 3 The next exercises use the data in the variable `gapminder` in the package `gapminder`. You can make it available with ```{r, echo = TRUE} data(gapminder, package = "gapminder") ``` Use the function `str` to examine the value of the gapminder variable. ```{r gapminder-str, exercise = TRUE} ``` ```{r gapminder-str-hint} str(gapminder) ``` ```{r gapminder_cases_question, echo = FALSE} question_text("How many cases are there in the data set?", answer(as.character(nrow(gapminder)), correct = TRUE), allow_retry = TRUE ) ``` ```{r gapminder_fct_question, echo = FALSE} question_text("How many of the variables are factors?", answer(as.character(sum(sapply(gapminder, is.factor))), correct = TRUE), allow_retry = TRUE ) ``` ### Exercise 4 Use the functions `class` and `names` to find the class and variable names in the `gapminder` data. ```{r gapminder_class_names, exercise = TRUE} ``` ### Exercise 5 Use `summary` to compute summary information for the variables. ```{r gapminder_summary, exercise = TRUE} ``` ### Exercise 6 Fill in the values for `---` needed to produce plots of life expectancy against year for the countries in continent Oceania. ```{r gapminder_plot, exercise = TRUE, eval = FALSE} library(dplyr) library(ggplot2) data(gapminder, package = "gapminder") ggplot(filter(gapminder, continent == "Oceania"), aes(x = ---, y = ---, color = country)) + geom_line() ```