import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
url_wine_red = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
url_wine_white = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
red = pd.read_csv(url_wine_red, delimiter=";")
white = pd.read_csv(url_wine_white, delimiter=";")
red.head(5)
white.head(5)
red['category']='red'
white['category']='white'
total_wine=red.append(white, ignore_index=True)
total_wine.head(5)
This chart is empty
Chart was probably not set up properly in the notebook
np.shape(total_wine)
print("N° de Filas: " + str(total_wine.shape[0]))
print("N° de Columnas: " + str(total_wine.shape[1]))
total_wine.info()
total_wine.describe()
quality_analysis = total_wine['quality']
quality_analysis.describe()
total_wine.groupby('quality').count()
sns.set_theme(style='darkgrid')
sns.countplot(total_wine['quality'])
plt.show()
total_wine['quality_category'] = total_wine['quality'].apply(lambda x: 'Poor' if x<=3 else ('Medium' if x<=6 else 'Hihg'))
total_wine.tail()
total_wine['quality_category'] = total_wine['quality_category'].astype('category')
total_wine.info()
sns.set_theme(style='darkgrid')
sns.catplot(x="quality_category", col="category", kind="count", data=total_wine)
plt.show()
figure = plt.figure(figsize=(20,13))
cols = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']
count = 1
for col in cols:
figure.add_subplot(3,4,count)
sns.boxplot(total_wine[col])
count += 1
plt.show()
matrix_corr = total_wine.corr(method='spearman')
matrix_corr.head(12)
plt.figure(figsize=(20,12))
sns.heatmap(matrix_corr, xticklabels=matrix_corr.columns, yticklabels=matrix_corr.columns, center=0, annot=True)
plt.show()
total_wine.corr()[['quality']].sort_values(by='quality', ascending = False)
total_wine_mini = total_wine[['alcohol','density','chlorides','volatile acidity','quality', 'quality_category']]
total_wine_mini.head()
total_wine_miniCorr = total_wine_mini.corr()
total_wine_miniCorr
sns.set(rc={'figure.figsize':(12,7)})
sns.heatmap(data=total_wine_miniCorr,annot=True)
sns.set_theme(style='ticks')
plt.show()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.set_theme(style='darkgrid')
sns.barplot(data=total_wine, x= 'quality_category', y= 'alcohol',ax = ax1)
ax1.set_ylim(0,20)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'alcohol', ax = ax2)
ax2.set_ylim(6,18)
plt.show()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.set_theme(style='darkgrid')
sns.barplot(data=total_wine, x= 'quality_category', y= 'density',ax = ax1)
ax1.set_ylim(0,2.5)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'density', ax = ax2)
ax2.set_ylim(0.98,1.05)
plt.show()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.set_theme(style='darkgrid')
sns.barplot(data=total_wine, x= 'quality_category', y= 'chlorides',ax = ax1)
ax1.set_ylim(0,0.18)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'chlorides', ax = ax2)
ax2.set_ylim(-0.1,0.7)
plt.show()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.set_theme(style='darkgrid')
sns.barplot(data=total_wine, x= 'quality_category', y= 'volatile acidity',ax = ax1)
ax1.set_ylim(0,1.4)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'volatile acidity', ax = ax2)
ax2.set_ylim(-0.5,2.5)
plt.show()