# libraries
import numpy as np
import pandas as pd
import altair as alt
import random
# load tidied data and print rows
cosmetic_data = pd.read_csv('/work/cscpopendata.csv')
cosmetic_data.head()
#tidying data
# data is already tidied
np.random.seed(123)
cosmetic_data_sample = cosmetic_data.sample(5000).drop(
['CSFId', 'CompanyId', 'PrimaryCategoryId', 'SubCategoryId', 'CasNumber', 'ChemicalId', 'MostRecentDateReported', 'ChemicalUpdatedAt'], axis = 1
)
cosmetic_data_sample.head()
chemicalbar = alt.Chart(cosmetic_data_sample).mark_bar().encode(
x= alt.X('ChemicalName', title = 'Chemical name'),
y = alt.Y('count()')
)
category = alt.Chart(cosmetic_data_sample).mark_bar().encode(
x= alt.X('PrimaryCategory', title = 'Product category'),
y = alt.Y('count()')
)
chemicalbar
category