# Cargando la data
flavors_cacao <- read_csv("flavors_of_cacao.csv")
# Previsualizar el df
head(flavors_cacao)
colnames(flavors_cacao)
glimpse(flavors_cacao)
# Limpieza y transformacion
flavors_cacao <- flavors_cacao %>%
rename(company_maker = 1, bean_origin_or_bar_name = 2
, review_date = 4, cocoa_percent = 5, company_location = 6
, bean_type = 8, broad_bean_origin = 9) %>%
rename_with(tolower)
flavors_cacao <- separate(flavors_cacao, cocoa_percent, into = c("cocoa_percent", NA), sep = "%", convert = TRUE)
# Selección de subset
flavors_df <- flavors_cacao %>% select(rating, cocoa_percent, company_location)
head(flavors_df)
# Entendiendo las calificaciones (Rating) general y por pais
flavors_df %>%
summarize(mean_rating = mean(rating))
flavors_df %>%
group_by(company_location) %>%
summarize(mean_rating = mean(rating)) %>%
arrange(-mean_rating)
# Mejores sabores por calidad y porcentaje cacao
best_flavor_df <- flavors_df %>%
filter(rating >= 3.75 & cocoa_percent >= 80) %>%
arrange(-rating, desc(cocoa_percent))
best_flavor_df
# Graficas
ggplot(data = best_flavor_df) + geom_bar(mapping = aes(x = company_location), fill="#FE9882")
ggplot(data = best_flavor_df) + geom_bar(mapping = aes(x = company_location, fill = rating))
ggplot(data = flavors_df) +
geom_point(mapping = aes(x = cocoa_percent, y = rating)) +
labs(title = "Recomended Bars")
ggplot(flavors_cacao, aes(x = rating, fill = as.factor(review_date))) +
geom_density(alpha = .5) +
theme_minimal() +
facet_wrap(~ as.factor(review_date)) +
guides(fill = FALSE) + labs(x = 'Rating', y = 'Density')
flavors_cacao %>%
group_by(company_location) %>%
filter(n() > 10) %>%
mutate(avg = mean(rating)) %>%
ggplot() +
geom_boxplot(aes(reorder(company_location, avg), rating, fill = avg)) +
scale_fill_continuous(low = '#ffffcc', high = '#fc4e2a', name = "Average rating") +
coord_flip() +
theme_minimal() +
labs(x = 'Company Location', y = 'Rating') +
expand_limits(y = c(0,5))