import pandas as pd
data = pd.read_csv('data/cars.csv')
data
print(data.dtypes)
manufacturer_name object
model_name object
transmission object
color object
odometer_value int64
year_produced int64
engine_fuel object
engine_has_gas bool
engine_type object
engine_capacity float64
body_type object
has_warranty bool
state object
drivetrain object
price_usd float64
is_exchangeable bool
location_region object
number_of_photos int64
up_counter int64
feature_0 bool
feature_1 bool
feature_2 bool
feature_3 bool
feature_4 bool
feature_5 bool
feature_6 bool
feature_7 bool
feature_8 bool
feature_9 bool
duration_listed int64
dtype: object
data.describe()
# Media del precio en USD
price_mean = data['price_usd'].mean()
print(f'Media del precio USD {price_mean}')
Media del precio USD 6639.971021255613
# Mediana del precio en USD
price_median = data['price_usd'].median()
print(f'Mediana del precio USD {price_median}')
Mediana del precio USD 4800.0
import matplotlib.pyplot as plt
plt.rcParamsDefault['figure.figsize']
# Modificar estilos para los gráficos
scale_x = 2
scale_y = 1.5
plt.rcParams['figure.figsize'] = (6.4*scale_x, 4.8*scale_y)
plt.style.use('seaborn-darkgrid')
# Mostrar histograma de precios con 20 bins
data['price_usd'].plot.hist(bins=20)
plt.title('Histograma de precio USD')
plt.xlabel('Precio en USD')
plt.ylabel('Frecuencia')
plt.show()
import seaborn as sns
_, ax = plt.subplots()
sns.histplot(ax=ax, data=data, x='price_usd', hue='manufacturer_name')
plt.show()
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/IPython/core/pylabtools.py:137: UserWarning: Creating legend with loc="best" can be slow with large amounts of data.
fig.canvas.print_figure(bytes_io, **kw)
# Mostrar histograma por tipo de motor
_, ax = plt.subplots()
sns.histplot(ax=ax, data=data, x='price_usd', hue='engine_type')
plt.show()
_, ax = plt.subplots()
sns.histplot(ax=ax, data=data, x='price_usd', hue='engine_type', multiple='stack')
plt.show()
# Agrupar por el tipo de motor y contar los datos
data.groupby('engine_type').count()
Q7_data = data[(data['manufacturer_name'] == 'Audi') & (data['model_name'] == 'Q7')]
Q7_data
_, ax = plt.subplots()
sns.histplot(ax=ax, data=Q7_data, x='price_usd', hue='year_produced')
plt.show()