# ! pip install sidetable -U
# ! pip install pandas -U
import sys
import pandas as pd
import sidetable
print(f"Python version {sys.version}")
print(f"pandas version: {pd.__version__}")
print(f"sidetable version: {sidetable.__version__}")
Python version 3.7.3 (default, Jun 11 2019, 01:11:15)
[GCC 6.3.0 20170516]
pandas version: 1.0.5
sidetable version: 0.5.0
df_penguins = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv')
df_penguins.head(2)
df_penguins.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 species 344 non-null object
1 island 344 non-null object
2 culmen_length_mm 342 non-null float64
3 culmen_depth_mm 342 non-null float64
4 flipper_length_mm 342 non-null float64
5 body_mass_g 342 non-null float64
6 sex 333 non-null object
dtypes: float64(4), object(3)
memory usage: 18.9+ KB
df_penguins.stb.missing()
df_penguins.isna().sum()
df_penguins.stb.missing(style=True)
df_penguins.stb.freq(['species'])
df_penguins.stb.freq(['species'], style=True)
type(df_penguins.stb.freq(['species']))
df_penguins.stb.freq(['species'], style=True, cum_cols=False)
df_penguins.stb.freq(['species', 'island', 'sex'])
df_penguins.stb.freq(['island'], style=True)
df_penguins.stb.freq(['island'], style=True, thresh=.5)
df_penguins.stb.freq(['island'], style=True, thresh=.9)
df_penguins.stb.freq(['island'], style=True, thresh=.9, other_label='Other Islands')
df_penguins.stb.freq(['island'], value='flipper_length_mm')
freq_table = df_penguins.stb.freq(['island'])
freq_table.columns = freq_table.columns.str.title()
freq_table
df_penguins.describe()
df_penguins.describe(include='all')
df_penguins.stb.subtotal().tail()
df_penguins.groupby(['species', 'sex']).agg(dict(island='count'))
df_penguins.groupby(['species', 'sex']).agg(dict(island='count')).stb.subtotal()