Seaborn
Basic structure
import matplotlib.pyplot as plt
import seaborn as sns
sns.barplot(x=["A", "B", "C"], y=[1,3,2])
plt.show()
Chart types in seaborn
import seaborn as sns
import matplotlib.pyplot as plt
#loading our data
#this data represents tips in a restaurant vs other variables
tips = sns.load_dataset('tips')
print(tips.head(5))
histogram (.displot())
sns.displot(data=tips, x="total_bill")
plt.show()
scatter plot (.scatterplot())
sns.scatterplot(data=tips, x="total_bill", y="tip")
plt.show()
lm plot (linear model)
sns.lmplot(data=tips, x="total_bill", y="tip")
plt.show()
Scatter plot with group by
import seaborn as sns
import matplotlib.pyplot as plt
#import dataset
tipsdata = sns.load_dataset("tips")
tipsdata.head()
#show dataset
print(tipsdata.head())
#scatter plot, segment tip % total_bill correlation by day
sns.scatterplot(data=tipsdata, x="total_bill", y="tip", hue="day", palette ="pastel")
plt.show()
heatmap
# see the correlation among the variables
tips.corr()
#heatmap of the correlations
sns.heatmap(tips.corr())
#heatmap of the correlations
sns.heatmap(tips.corr(), annot= True, cmap='coolwarm', linewidths=5, linecolor='black',
vmin=0.5,vmax=1,cbar=False);
Kernel Density Estimation (KDE)
sns.kdeplot(data= tips, x= 'total_bill');
#In statistics, kernel density estimation (KDE) is the application of kernel smoothing for
#probability density estimation, i.e., a non-parametric method to estimate the probability
#density function of a random variable based on kernels as weights.
Change chart type (kind)
print(tips.head(5))
#Example
#lineplot is under relplot
sns.relplot(data=tips, x="total_bill", y="tip", kind="line")
plt.show()
#rugplot is under distplot
sns.displot(data=tips, x="tip", kind="ecdf")
plt.show()
Remove Legend, change palette & transparency
#note that the hue argument would add a legend of sex, but legend=False removed it.
#we also changed the line transparency with alpha=0.5
sns.displot(data= tips, x= 'total_bill', hue = 'sex', kind = 'kde', legend= False, palette='dark', alpha = .5)
plt.show()
Group by (hue)
#let's use this data from the lesson above
print(tips.head(5))
sns.scatterplot(data=tips, x="total_bill", y="tip", hue="sex")
plt.show()
Multiple charts
import seaborn as sns
import matplotlib.pyplot as plt
#work with the following data
tips = sns.load_dataset('tips')
tips.head(2)
Combine charts (overlapping)
#first chart
sns.boxplot(data=tips,x="day",y="total_bill",hue="sex", dodge=True)
#second chart
sns.swarmplot(data=tips,x="day",y="total_bill",hue="sex", palette='dark:0', dodge=True)
#the dodge argument is for the swarm plot to segment by sex
plt.show()
One next to the other
sns.relplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', kind= 'scatter', col = 'time');
Jointplot
import seaborn as sns
import matplotlib.pyplot as plt
#loading our data
tips = sns.load_dataset('tips')
tips.head()
#jointplot chart
sns.jointplot(data=tips, x="total_bill", y="tip", hue="sex", kind="scatter");
#with kind you edit the type of the main chart
marginal_ticks
sns.jointplot(data=tips, x="total_bill", y="tip", hue="sex", kind="kde", marginal_ticks=True);
marginal_kws
sns.jointplot(data=tips, x="total_bill", y="tip", hue="sex", kind="hist",
marginal_ticks=True, #shows a small table for the external chart
marginal_kws=dict(bins= 25, fill = True, multiple= 'dodge') #arguments only affect the external chart
)
Modify style,pallette & font (Set)
Modify size
plt.figure(figsize=(1,1))
sns.set()
sns.barplot(x=["A", "B", "C"], y=[1,3,2])
plt.show()
Set (modify style, pallette & font
sns.set(style="dark", palette="Spectral", font_scale=3)
sns.barplot(x=["A", "B", "C"], y=[1,3,2])
plt.show()
sns.set()
sns.barplot(x=["A", "B", "C"], y=[1,3,2])
plt.show()
Seaborn color palletes
sns.color_palette("husl", 9)
sns.color_palette("Spectral", as_cmap=True)
sns.color_palette("dark:#5A9_r", as_cmap=True)
sns.color_palette("pastel")
Seaborn themes
Save your chart as a png
Chart customization
import seaborn as sns
import matplotlib.pyplot as plt
tipsdata = sns.load_dataset("tips")
tipsdata.head()
Acumulative charts
sns.histplot(data=tipsdata, x="tip", bins = 15, cumulative=True)
plt.show()
Statistic charts
sns.histplot(data = tipsdata, x= 'tip', bins = 15, hue= 'sex', stat = 'density')
plt.show()
sns.histplot(data = tipsdata, x= 'tip', bins = 15, hue= 'sex', stat = 'frequency')
plt.show()
Chart grouping
#first plot with stack
sns.histplot(data = tipsdata, x= 'tip', bins = 15, hue= 'sex', multiple = 'stack')
plt.show()
#second plot with dodge
sns.histplot(data = tipsdata, x= 'tip', bins = 15, hue= 'sex', multiple = 'dodge')
plt.show()
Area below the curve
#first plot with stack
sns.kdeplot(data = tipsdata, x= 'tip', hue= 'sex', fill = True)
plt.show()
Chart types for categorical data
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
tips.head(2)
#the categorical variables are "sex", "smoker", "day", "time"
Catplot
sns.catplot(data=tips, x="day", y="total_bill",hue="sex",dodge=True,kind="box",col="smoker")
plt.show()
"Bar plot (count)
sns.countplot(data = tips, x="day", hue="sex");
swarm plot(dots diagram)
sns.swarmplot(data = tips, x="day", y="total_bill", hue="sex");
sns.swarmplot(data = tips, x="day", y="total_bill", hue="sex", dodge=True);
#dodge fixes the issue of one category over the other
stripplot
sns.stripplot(data = tips, x="day", y="total_bill", hue="sex", dodge=True);
boxplot separated categories
sns.boxplot(data=tips,x="day",y="total_bill",hue="sex", showfliers=True)
#i put showfliers argument in case you want to remove outliers
plt.show()
violin plot
sns.violinplot(data=tips, x="day", y="total_bill")
plt.show()
Boxplot + Swarmplot
#first chart
sns.boxplot(data=tips,x="day",y="total_bill",hue="sex", dodge=True)
#second chart
sns.swarmplot(data=tips,x="day",y="total_bill",hue="sex", palette='dark:0', dodge=True, marker="<")
#the dodge argument is for the swarm plot to segment by sex
plt.show()
Correlation charts
import seaborn as sns
import matplotlib.pyplot as plt
#data to work on
tips = sns.load_dataset('tips')
tips.head(2)
correlation by categories
sns.scatterplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', palette="pastel");
lm plot with multiple categories
sns.lmplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', palette="dark");
iris = sns.load_dataset("iris")
iris.head()
sns.lmplot(data=iris, x="sepal_length", y="petal_length", hue="species");
second segmentation in legend
change dot shape
sns.scatterplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', palette="pastel", style="time");
change dot shape (but you deciding it)
#define the shape dictionary
shapes = { "Lunch":"D", "Dinner":"s"}
sns.scatterplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', palette="pastel", style="time",
markers=shapes);
change dot size based on numerical variable
sns.scatterplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', palette="pastel", size="size")
plt.show()
Multiple correlation charts
sns.relplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', palette="pastel", size="size", col="time", kind="scatter")
plt.show()
Move the legend (relocate)
#make the chart bigger
plt.figure(figsize=(8,8))
sns.scatterplot(data= tips, x= 'total_bill', y = 'tip', hue= 'day', style="time", size="size")
plt.legend(loc="center", bbox_to_anchor=(1.2,0.5)) #bbox_to_anchor(xposition, yposition)
plt.show()
Pairplot (correlation among all the variables)
# see the correlation among the variables
tips.corr() #---> Muestra las variables correlacionadas entre si
sns.pairplot(data=tips)
plt.show()
Pairplot + diag_kind + hue
iris = sns.load_dataset("iris")
iris.head()
sns.pairplot(data=iris, hue="species", palette="inferno", diag_kind="kde");
Pairplot corner
sns.pairplot(data= tips, corner=True);
Line charts
#loading our data
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
tips.head(2)
sns.lineplot(data=tips, x="total_bill", y="tip", hue="time", size="size", style="day");
relplot
sns.relplot(data= tips, x= 'total_bill', y = 'tip', hue= 'time', style= 'day', size='size', kind= 'line');