Motivation
Workflow
Setup
#install.packages("ggrepel")
#install.packages("moderndive")
suppressWarnings(suppressMessages({
library(gapminder)
library(tidyverse)
library(plotly)
library(ggrepel)
library(ExPanDaR)
library(moderndive)
library(skimr)
}))
knitr::opts_chunk$set(fig.align = 'center')
options(warn = -1)
options(scipen = 10000)
options(repr.plot.width = 6, repr.plot.height = 4)
Famous example
ggplotly(
ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
  geom_point(aes(size = pop, frame = year, ids = country)) + 
  labs(x = "GDP per capita", y = "Life Expectancy") +
  scale_x_log10() +
  guides(color = guide_legend(title = "Continent"))
)
Import data
# gapminder <- read.csv("gapminder.csv")
gapminder
Glimpse and skim data
glimpse(gapminder)
summary(gapminder)
gapminder %>% 
    filter(year == 2007, continent == "Americas") %>%
    select(lifeExp, gdpPercap) %>%
    skim() %>%
    as_tibble()
Transform data
Filter
gapminder %>% 
    filter(year == 2007)
gapminder %>% 
    filter(country == "United States")
gapminder %>% 
    filter(year == 2007, country == "United States")
Select
gapminder %>% 
    select(country, gdpPercap)
gapminder %>% 
    select(country, gdpPercap, year) %>%
    filter(year == 2007)
Arrange
gapminder %>%
    arrange(gdpPercap)
gapminder %>%
    arrange(desc(gdpPercap))
gapminder %>%
    filter(year == 2007) %>%
    arrange(desc(gdpPercap))
Mutate
gapminder %>% 
    mutate(pop = pop / 1000000)
gapminder %>% 
    mutate(gdp = gdpPercap * pop)
gapminder %>% 
    mutate(gdp = gdpPercap * pop) %>%
    filter(year == 2007) %>% 
    arrange(desc(gdp))
Summarize
gapminder %>%
    summarize(meanLifeExp = mean(lifeExp))
gapminder %>%
    filter(year == 2007) %>%
    summarize(meanLifeExp = mean(lifeExp))
gapminder %>%
    filter(year == 2007) %>%
    summarize(meanLifeExp = mean(lifeExp),
    totalPop = sum(pop))
Group_by
gapminder %>%
    group_by(year) %>%
    summarize(meanLifeExp = mean(lifeExp),
                 totalPop = sum(pop))
gapminder %>%
    filter(year == 2007) %>%
    group_by(continent) %>%
    summarize(meanLifeExp = mean(lifeExp),
                totalPop = sum(pop))
gapminder %>%
    group_by(year, continent) %>%
    summarize(totalPop = sum(pop),
              meanLifeExp = mean(lifeExp))
Visualize data
Scatter plot
gapminder_2007 <- gapminder %>%
    filter(year == 2007)
gapminder_2007
gapminder_2007 %>%
  ggplot(aes(x = gdpPercap, y = lifeExp)) +
  geom_point() +
  labs(x = "GDP per capita", y = "Life Expectancy") +
  geom_text_repel(aes(label = country))
ggplotly(
gapminder_2007 %>%
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
  geom_point(aes(label = country)) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_x_log10() +
  labs(x = "GDP per capita", y = "Life Expectancy") 
)
ggplotly(
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
    geom_point(aes(frame = year, ids = country)) + 
    scale_x_log10() +
    labs(x = "GDP per capita", y = "Life Expectancy") 
)
Facets plots
ggplotly(
ggplot(gapminder_2007, aes(x = gdpPercap, y = lifeExp, color = continent)) +
    geom_point(aes(label = country))  +
    geom_smooth(method = "lm", se = FALSE) +
    scale_x_log10() +
    facet_wrap(~ continent) +
    labs(x = "GDP per capita", y = "Life Expectancy") 
)
ggplotly(
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
    geom_point(aes(frame = year, ids = country)) +
    scale_x_log10() +
    facet_wrap(~ continent)
)
Box plot
ggplotly(
ggplot(gapminder_2007, aes(x = continent, y = lifeExp, color = continent)) +
    geom_boxplot() +
    labs(x = "", y = "Life Expectancy") +
    coord_flip()
)
ggplotly(
ggplot(gapminder, aes(x = continent, y = lifeExp, color = continent)) +
    geom_boxplot(aes(frame = year, ids = country)) +
    labs(x = "", y = "Life Expectancy") +
    coord_flip()
)
Group_by and plot
by_year <- gapminder %>%
    group_by(year) %>%
    summarize(totalPop = sum(pop),
              meanLifeExp = mean(lifeExp))
by_year
by_year_continent <- gapminder %>%
    group_by(year, continent) %>%
    summarize(totalPop = sum(pop),
              meanLifeExp = mean(lifeExp))
by_year_continent
ggplotly(
ggplot(by_year_continent, aes(x = year, y = totalPop, color = continent)) +
    geom_point() +
    expand_limits(y = 0)
)
Line plot
ggplotly(
ggplot(by_year_continent, aes(x = year, y = meanLifeExp, color = continent)) +
    geom_line() 
)
Bar plot
by_continent <- gapminder %>%
    filter(year == 2007) %>%
    group_by(continent) %>%
    summarize(meanLifeExp = mean(lifeExp))
by_continent
ggplotly(
ggplot(by_continent, aes(x = continent, y = meanLifeExp)) +
    geom_col()
)
Histogram
ggplotly(
    
ggplot(gapminder_2007, aes(x = lifeExp)) +
    geom_histogram()
)
ggplotly(
ggplot(gapminder_2007, aes(x = lifeExp)) +
    geom_histogram(binwidth = 5)
)