Motivation
Workflow
Setup
#install.packages("ggrepel")
#install.packages("moderndive")
suppressWarnings(suppressMessages({
library(gapminder)
library(tidyverse)
library(plotly)
library(ggrepel)
library(ExPanDaR)
library(moderndive)
library(skimr)
}))
knitr::opts_chunk$set(fig.align = 'center')
options(warn = -1)
options(scipen = 10000)
options(repr.plot.width = 6, repr.plot.height = 4)
Famous example
ggplotly(
ggplot(gapminder, aes(gdpPercap, lifeExp, color = continent)) +
geom_point(aes(size = pop, frame = year, ids = country)) +
labs(x = "GDP per capita", y = "Life Expectancy") +
scale_x_log10() +
guides(color = guide_legend(title = "Continent"))
)
Import data
# gapminder <- read.csv("gapminder.csv")
gapminder
Glimpse and skim data
glimpse(gapminder)
summary(gapminder)
gapminder %>%
filter(year == 2007, continent == "Americas") %>%
select(lifeExp, gdpPercap) %>%
skim() %>%
as_tibble()
Transform data
Filter
gapminder %>%
filter(year == 2007)
gapminder %>%
filter(country == "United States")
gapminder %>%
filter(year == 2007, country == "United States")
Select
gapminder %>%
select(country, gdpPercap)
gapminder %>%
select(country, gdpPercap, year) %>%
filter(year == 2007)
Arrange
gapminder %>%
arrange(gdpPercap)
gapminder %>%
arrange(desc(gdpPercap))
gapminder %>%
filter(year == 2007) %>%
arrange(desc(gdpPercap))
Mutate
gapminder %>%
mutate(pop = pop / 1000000)
gapminder %>%
mutate(gdp = gdpPercap * pop)
gapminder %>%
mutate(gdp = gdpPercap * pop) %>%
filter(year == 2007) %>%
arrange(desc(gdp))
Summarize
gapminder %>%
summarize(meanLifeExp = mean(lifeExp))
gapminder %>%
filter(year == 2007) %>%
summarize(meanLifeExp = mean(lifeExp))
gapminder %>%
filter(year == 2007) %>%
summarize(meanLifeExp = mean(lifeExp),
totalPop = sum(pop))
Group_by
gapminder %>%
group_by(year) %>%
summarize(meanLifeExp = mean(lifeExp),
totalPop = sum(pop))
gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarize(meanLifeExp = mean(lifeExp),
totalPop = sum(pop))
gapminder %>%
group_by(year, continent) %>%
summarize(totalPop = sum(pop),
meanLifeExp = mean(lifeExp))
Visualize data
Scatter plot
gapminder_2007 <- gapminder %>%
filter(year == 2007)
gapminder_2007
gapminder_2007 %>%
ggplot(aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
labs(x = "GDP per capita", y = "Life Expectancy") +
geom_text_repel(aes(label = country))
ggplotly(
gapminder_2007 %>%
ggplot(aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
geom_point(aes(label = country)) +
geom_smooth(method = "lm", se = FALSE) +
scale_x_log10() +
labs(x = "GDP per capita", y = "Life Expectancy")
)
ggplotly(
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop)) +
geom_point(aes(frame = year, ids = country)) +
scale_x_log10() +
labs(x = "GDP per capita", y = "Life Expectancy")
)
Facets plots
ggplotly(
ggplot(gapminder_2007, aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point(aes(label = country)) +
geom_smooth(method = "lm", se = FALSE) +
scale_x_log10() +
facet_wrap(~ continent) +
labs(x = "GDP per capita", y = "Life Expectancy")
)
ggplotly(
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point(aes(frame = year, ids = country)) +
scale_x_log10() +
facet_wrap(~ continent)
)
Box plot
ggplotly(
ggplot(gapminder_2007, aes(x = continent, y = lifeExp, color = continent)) +
geom_boxplot() +
labs(x = "", y = "Life Expectancy") +
coord_flip()
)
ggplotly(
ggplot(gapminder, aes(x = continent, y = lifeExp, color = continent)) +
geom_boxplot(aes(frame = year, ids = country)) +
labs(x = "", y = "Life Expectancy") +
coord_flip()
)
Group_by and plot
by_year <- gapminder %>%
group_by(year) %>%
summarize(totalPop = sum(pop),
meanLifeExp = mean(lifeExp))
by_year
by_year_continent <- gapminder %>%
group_by(year, continent) %>%
summarize(totalPop = sum(pop),
meanLifeExp = mean(lifeExp))
by_year_continent
ggplotly(
ggplot(by_year_continent, aes(x = year, y = totalPop, color = continent)) +
geom_point() +
expand_limits(y = 0)
)
Line plot
ggplotly(
ggplot(by_year_continent, aes(x = year, y = meanLifeExp, color = continent)) +
geom_line()
)
Bar plot
by_continent <- gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarize(meanLifeExp = mean(lifeExp))
by_continent
ggplotly(
ggplot(by_continent, aes(x = continent, y = meanLifeExp)) +
geom_col()
)
Histogram
ggplotly(
ggplot(gapminder_2007, aes(x = lifeExp)) +
geom_histogram()
)
ggplotly(
ggplot(gapminder_2007, aes(x = lifeExp)) +
geom_histogram(binwidth = 5)
)