# Cargando la data
flavors_cacao <- read_csv("flavors_of_cacao.csv")
Rows: 1795 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Company
(Maker-if known), Specific Bean Origin
or Bar Name, Cocoa
...
dbl (3): REF, Review
Date, Rating
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Previsualizar el df
head(flavors_cacao)
colnames(flavors_cacao)
glimpse(flavors_cacao)
Rows: 1,795
Columns: 9
$ `Company \n(Maker-if known)` <chr> "A. Morin", "A. Morin", "A. Morin"…
$ `Specific Bean Origin\nor Bar Name` <chr> "Agua Grande", "Kpime", "Atsane", …
$ REF <dbl> 1876, 1676, 1676, 1680, 1704, 1315…
$ `Review\nDate` <dbl> 2016, 2015, 2015, 2015, 2015, 2014…
$ `Cocoa\nPercent` <chr> "63%", "70%", "70%", "70%", "70%",…
$ `Company\nLocation` <chr> "France", "France", "France", "Fra…
$ Rating <dbl> 3.75, 2.75, 3.00, 3.50, 3.50, 2.75…
$ `Bean\nType` <chr> " ", " ", " ", " ", " ", "Criollo"…
$ `Broad Bean\nOrigin` <chr> "Sao Tome", "Togo", "Togo", "Togo"…
# Limpieza y transformacion
flavors_cacao <- flavors_cacao %>%
rename(company_maker = 1, bean_origin_or_bar_name = 2
, review_date = 4, cocoa_percent = 5, company_location = 6
, bean_type = 8, broad_bean_origin = 9) %>%
rename_with(tolower)
flavors_cacao <- separate(flavors_cacao, cocoa_percent, into = c("cocoa_percent", NA), sep = "%", convert = TRUE)
# Selección de subset
flavors_df <- flavors_cacao %>% select(rating, cocoa_percent, company_location)
head(flavors_df)
# Entendiendo las calificaciones (Rating) general y por pais
flavors_df %>%
summarize(mean_rating = mean(rating))
flavors_df %>%
group_by(company_location) %>%
summarize(mean_rating = mean(rating)) %>%
arrange(-mean_rating)
# Mejores sabores por calidad y porcentaje cacao
best_flavor_df <- flavors_df %>%
filter(rating >= 3.75 & cocoa_percent >= 80) %>%
arrange(-rating, desc(cocoa_percent))
best_flavor_df
# Graficas
ggplot(data = best_flavor_df) + geom_bar(mapping = aes(x = company_location), fill="#FE9882")
ggplot(data = best_flavor_df) + geom_bar(mapping = aes(x = company_location, fill = rating))
ggplot(data = flavors_df) +
geom_point(mapping = aes(x = cocoa_percent, y = rating)) +
labs(title = "Recomended Bars")
ggplot(flavors_cacao, aes(x = rating, fill = as.factor(review_date))) +
geom_density(alpha = .5) +
theme_minimal() +
facet_wrap(~ as.factor(review_date)) +
guides(fill = FALSE) + labs(x = 'Rating', y = 'Density')
Warning message:
“`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.”
flavors_cacao %>%
group_by(company_location) %>%
filter(n() > 10) %>%
mutate(avg = mean(rating)) %>%
ggplot() +
geom_boxplot(aes(reorder(company_location, avg), rating, fill = avg)) +
scale_fill_continuous(low = '#ffffcc', high = '#fc4e2a', name = "Average rating") +
coord_flip() +
theme_minimal() +
labs(x = 'Company Location', y = 'Rating') +
expand_limits(y = c(0,5))