import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# agrega librerías que necesites
url_wine_red = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
url_wine_white = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv'
red = pd.read_csv(url_wine_red, delimiter=';')
white = pd.read_csv(url_wine_white, delimiter=';')
red.head()
white.head()
red['category']='red'
white['category']='white'
total_wine=red.append(white, ignore_index=True)
total_wine.head()
total_wine.shape
total_wine.info()
total_wine.describe()
quality = total_wine['quality']
quality
sns.histplot(quality)
sns.countplot(data= total_wine, x= quality )
total_wine['quality_category'] = total_wine['quality'].apply(lambda x: "Poor" if x < 5 else ("Medium " if x >=5 and x <= 7 else "High"))
total_wine.tail()
total_wine.quality_category = total_wine['quality_category'].astype('category')
total_wine.info()
sns.countplot(total_wine['quality_category'])
sns.boxplot(quality)
wine_corr = total_wine.corr()
wine_corr
plt.figure(figsize =(10,10))
sns.heatmap(wine_corr, annot= True, cmap='PuBu')
totoal_wine_sort = total_wine.corr()[['quality']].sort_values(by='quality', ascending = False)
totoal_wine_sort
sns.pairplot(total_wine,
x_vars=["density", "volatile acidity", "chlorides"],
y_vars=["alcohol", "citric acid"],
hue= 'quality', kind = 'scatter');
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.barplot(data=total_wine, x= 'quality_category', y= 'density',ax = ax1)
ax1.set_ylim(0.98,1.05)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'density', ax = ax2)
ax2.set_ylim(0.98,1.05)
fig.tight_layout()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.barplot(data=total_wine, x= 'quality_category', y= 'volatile acidity', ax = ax1)
ax1.set_ylim(-0.14, 1.83)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'volatile acidity', ax = ax2)
ax2.set_ylim(-0.14, 1.83)
fig.tight_layout()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.barplot(data=total_wine, x= 'quality_category', y= 'chlorides', ax = ax1)
ax1.set_ylim(-0.05, 0.675)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'chlorides', ax = ax2)
ax2.set_ylim(-0.05, 0.675)
fig.tight_layout()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.barplot(data=total_wine, x= 'quality_category', y= 'alcohol', ax = ax1)
ax1.set_ylim(6.93,15.71)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'alcohol', ax = ax2)
ax2.set_ylim(6.93,15.71)
fig.tight_layout()
fig, [ax1, ax2] = plt.subplots(nrows=1, ncols=2)
sns.barplot(data=total_wine, x= 'quality_category', y= 'citric acid', ax = ax1)
ax1.set_ylim(-0.21, 1.80)
sns.violinplot(data=total_wine, x= 'quality_category', y= 'citric acid', ax = ax2)
fig.tight_layout()