# library imports
import pandas as pd
import seaborn as sns
import numpy as np
# raw dataset
iris = sns.load_dataset('iris')
# check dataset
iris.sample(10)
# A type of indexing method in which we can select subsets of data
# using a boolean vector to filter the data.
# Eg: show only those rows where species is setosa and sepal_width is greater than 2.5
iris.loc[
(iris['species']=='setosa')
& (iris['sepal_width']>2.5)
]
# The pivot table function takes a data frame as input and performs grouped
# operations that provide a multidimensional summarization of the data.
# Eg: group by species and calculate the medians
iris.pivot_table(
index = ['species'],
values = ['sepal_width','sepal_length', 'petal_width', 'petal_length'],
aggfunc = np.median
)