import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import urllib.request as Req
from zipfile import ZipFile
import plotly.graph_objects as go
import plotly.express as px
# import plotly.offline as pyo
# pyo.init_notebook_mode()
url = r'https://import.cdn.thinkific.com/220744/courses/1648061/pokemon_dataset-220322-181028.zip'
Req.urlretrieve(url, r'pokemon_dataset.zip')
pokemon_zip = ZipFile(r'pokemon_dataset.zip')
pokemon_zip.filelist
archivo = pokemon_zip.open('pokedex_mastermind.csv')
df = pd.read_csv(archivo,
index_col=[0])
df
df['name'].median
df.duplicated().any()
df.duplicated().unique()
df.columns
df.head()
df.tail()
df.sample(5)
df[['defense', 'sp_attack',
'sp_defense', 'speed', 'catch_rate', 'percentage_male',
'against_normal', 'against_fire', 'against_water', 'against_electric',
'against_grass', 'against_ice', 'against_fight', 'against_poison']].head()
df.columns
weakness = df[['against_normal', 'against_fire', 'against_water', 'against_electric',
'against_grass', 'against_ice', 'against_fight', 'against_poison',
'against_ground', 'against_flying', 'against_psychic', 'against_bug',
'against_rock', 'against_ghost', 'against_dragon', 'against_dark',
'against_steel', 'against_fairy']]
cols = ['against_normal', 'against_fire', 'against_water', 'against_electric',
'against_grass', 'against_ice', 'against_fight', 'against_poison',
'against_ground', 'against_flying', 'against_psychic', 'against_bug',
'against_rock', 'against_ghost', 'against_dragon', 'against_dark',
'against_steel', 'against_fairy']
weakness.columns
weakness.columns = [x.replace('against_', '') for x in weakness.columns]
weakness.head()
df.drop(cols, axis=1, inplace=True)
df.head()
df.select_dtypes(include=[object])
df.select_dtypes(include=[object]).columns
cols = ['status', 'type_1', 'type_2']
# df['status'].str.lower() Se puede realizar esta linea de codigo por array pero no a varias columnas
df[cols] = df[cols].apply(lambda x: x.str.lower())
df.select_dtypes(include=[object])
df['percentage_male'].unique()
df['percentage_male'] = df['percentage_male'].str.replace('%', '').apply(float)
df['percentage_male'].unique()
df['weight_kg'] = df['weight_pounds'] * 0.453592
df['weight_kg']
df['weight_kg'] = df['weight_kg'].round(2)
df['weight_kg']
df['weight_pounds'] = df['weight_kg']
df
df.drop('weight_kg',
axis=1,
inplace=True)
df.rename(columns={'weight_pounds':'weight_kg'}, inplace=True)
df
df['name'][:50]
df['name'].str.contains('Mega')
df[df['name'].str.contains('Mega')]
df[df['name'].str.contains('Mega')]['name']
len(df[df['name'].str.contains('Mega')]['name'])
nombres = df[df['name'].str.contains('Mega')]['name']
nombres[3]
pat = '^.*(?=(Mega))'
nombres.str.replace(pat, '', regex=True)
df.head()
df['name'].str.replace(pat, '', regex=True)
df['name'] = df['name'].str.replace(pat, '', regex=True)
df.head()
df.describe()
df.head()
df.isna()
df.isna().any()
df['type_2'].isna()
df[df['type_2'].isna()]
len(df[df['type_2'].isna()])
df['type_2'].unique()
df['type_2'].fillna('none', inplace=True)
df[df['catch_rate'].isna()]
media_normales = df[df['status'] == 'normal']['catch_rate'].mean()
media_legendarios = df[df['status'] == 'legendary']['catch_rate'].mean()
df.describe()
df.describe()['catch_rate']
print(f'Media de atrapabilidad de Pokemon normales es {media_normales}')
print(f'Media de atrapabilidad de Pokemon legendarios es {media_legendarios}')
sin_datos = df['catch_rate'].isna()
son_legendarios = df['status'] == 'legendary'
son_normales = df['status'] == 'normal'
df.loc[sin_datos & son_legendarios, 'catch_rate'] = media_legendarios
df.loc[sin_datos & son_normales, 'catch_rate'] = media_normales
df['catch_rate'].isna().any()
df['catch_rate'] = df['catch_rate'].round(2)
df.isna().any()
df[df['weight_kg'].isna()]
peso = 950
pokemon = df['name'] == 'Eternatus Eternamax'
df.loc[pokemon, 'weight_kg'] = peso
df.loc[pokemon]
df.isna().any()
df.info()
df['type_1'].value_counts()
pd.pivot_table(df,
index='type_1')
pd.pivot_table(df,
index='type_1',
columns='type_2',
values='name',
aggfunc= lambda x: x.value_counts().count(),
fill_value=0)
contar_tipos = pd.pivot_table(df,
index='type_1',
columns='type_2',
values='name',
aggfunc= lambda x: x.value_counts().count(),
fill_value=0)
fig = plt.figure(figsize=(18,15))
g = sns.heatmap(contar_tipos,
cmap='coolwarm',
annot=True,
center=25)
g.set_title('Cantidad de pokemon por tipo', fontsize=20)
g.set_xlabel('Tipo 1', fontsize=15)
g.set_ylabel('Tipo 2', fontsize=15)
plt.show()
df[df['status'] == 'legendary']
df[df['status'] == 'legendary']['generation'].value_counts()
g = sns.catplot(data=df[df['status'] != 'normal'],
y='generation',
kind='count',
edgecolor='black',
hue='status',
palette='coolwarm',
alpha=0.8)
g.fig.set_size_inches(16,8)
g.set(xlim=(0, 18))
g.fig.suptitle('Cantidad de pokemon legendarios por generacion')
plt.show()
df.columns
stat_columns = ['hp', 'attack', 'defense', 'sp_attack',
'sp_defense', 'speed']
df['total'] = df[stat_columns].sum(axis=1)
df
g = sns.catplot(data=df,
x='generation',
y='total',
kind='box',
palette='Set2')
g.fig.set_size_inches(16,8)
# g.set(xlim=(0, 18))
g.fig.suptitle('Poder de los pokemon por generacion')
plt.show()
g = sns.catplot(data=df,
x='type_1',
y='total',
kind='box',
palette='Set2')
g.fig.set_size_inches(16,8)
# g.set(xlim=(0, 18))
g.fig.suptitle('Poder de los pokemon por generacion')
plt.show()
def stats_medios(tipo):
stat_columns = ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']
return df.loc[df['type_1'] == tipo, stat_columns].mean()
tipo_agua = stats_medios('water')
tipo_fuego = stats_medios('fire')
def stats_pokemon(nombre):
stat_columns = ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']
return df.loc[df['type_1'] == tipo, stat_columns].mean()
tipo_agua
fig = go.Figure()
fig.add_trace(go.Scatterpolar(r=tipo_agua.values,
theta=tipo_agua.index,
name='water',
fill='toself'))
fig.add_trace(go.Scatterpolar(r=tipo_fuego.values,
theta=tipo_fuego.index,
name='fire',
fill='toself'))
fig.update_layout(showlegend=True,
polar={'radialaxis':{'visible':False}},
title='Grafica comparativa stats',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig.show()
def comparar_stats(df, lista_tipos):
fig = go.Figure()
for tipo in lista_tipos:
stats = stats_medios(tipo)
fig.add_trace(go.Scatterpolar(r=stats.values,
theta=stats.index,
name=tipo,
fill='toself',
opacity=0.5,
hoveron='points',
hovertemplate='%{theta}: %{r}'))
fig.update_layout(showlegend=True,
polar={'radialaxis':{'visible':False}},
title='Grafica comparativa stats',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig.show()
tipos = ['water', 'fire', 'poison', 'dragon']
comparar_stats(df, tipos)
def comparar_pokemons(dataframe, pokemon_list):
stat_columns = ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense','speed']
fig = go.Figure()
for pokemon in pokemon_list:
datos = dataframe[dataframe['name'] == pokemon][stat_columns].squeeze()
valores = datos.values.tolist()
columnas = datos.index.tolist()
fig.add_trace(go.Scatterpolar(r= valores,
theta= columnas,
name=pokemon,
fill='toself',
hoverinfo= 'text+theta+name'))
fig.update_layout(title=go.layout.Title(text='Comparacion Pokémon'),
polar={'radialaxis': {'visible': False}},
showlegend=True,
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig.show()
pokemons = ['Rayquaza', 'Mimikyu', 'Pikachu']
comparar_pokemons(df, pokemons,
#pokemon_colors
)
pokemon_colors = {'normal': '#A8A77A',
'fire': '#EE8130',
'water': '#6390F0',
'electric': '#F7D02C',
'grass': '#7AC74C',
'ice': '#96D9D6',
'fighting': '#C22E28',
'poison': '#A33EA1',
'ground': '#E2BF65',
'flying': '#A98FF3',
'psychic': '#F95587',
'bug': '#A6B91A',
'rock': '#B6A136',
'ghost': '#735797',
'dragon': '#6F35FC',
'dark': '#705746',
'steel': '#B7B7CE',
'fairy': '#D685AD'}
def comparar_pokemons(dataframe, pokemon_list):
colors = {'normal': '#A8A77A',
'fire': '#EE8130',
'water': '#6390F0',
'electric': '#F7D02C',
'grass': '#7AC74C',
'ice': '#96D9D6',
'fighting': '#C22E28',
'poison': '#A33EA1',
'ground': '#E2BF65',
'flying': '#A98FF3',
'psychic': '#F95587',
'bug': '#A6B91A',
'rock': '#B6A136',
'ghost': '#735797',
'dragon': '#6F35FC',
'dark': '#705746',
'steel': '#B7B7CE',
'fairy': '#D685AD'}
stat_columns = ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense','speed']
fig = go.Figure()
for pokemon in pokemon_list:
datos = dataframe[dataframe['name'] == pokemon][stat_columns].squeeze()
pokecolor = dataframe[dataframe['name'] == pokemon]['type_1'].squeeze()
valores = datos.values.tolist()
columnas = datos.index.tolist()
fig.add_trace(go.Scatterpolar(r= valores,
theta= columnas,
name=pokemon,
fill='toself',
fillcolor = colors[pokecolor],
marker_line_color = colors[pokecolor],
opacity=0.7,
hovertemplate = "%{theta}: %{r}",
hoveron = "points", #"fills", "points+fills"
))
fig.update_layout(title=go.layout.Title(text='Comparacion Pokémon por Stats'),
polar={'radialaxis': {'visible': False}},
showlegend=True,
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig.show()
pokemons = ['Squirtle', 'Charmander', 'Bulbasaur']
comparar_pokemons(df, pokemons)
df.head()
mas_altos = df.sort_values(by='height_m',
ascending=False)[['name', 'height_m']][:5].set_index('name')
mas_altos
mas_pesados = df.sort_values(by='weight_kg',
ascending=False)[['name', 'weight_kg']][:5].set_index('name')
mas_pesados
df['BMI'] = df['weight_kg'] / df['height_m']
df['BMI']
bmi_mas_bajo = df.sort_values(by='BMI')[['name', 'BMI']][:5].set_index('name')
bmi_mas_bajo
mas_pesados = mas_pesados.squeeze()
bmi_mas_bajo = bmi_mas_bajo.squeeze()
mas_altos = mas_altos.squeeze()
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))
sns.barplot(ax=ax1,
x=mas_altos.index,
y=mas_altos.values)
ax1.set_title('Mas altos')
ax1.set(xlabel=None)
ax1.set_xticklabels(mas_altos.index,
rotation=45,
horizontalalignment='right')
sns.barplot(ax=ax2,
x=mas_pesados.index,
y=mas_pesados.values)
ax2.set_title('Mas pesados')
ax2.set(xlabel=None)
ax2.set_xticklabels(mas_pesados.index,
rotation=45,
horizontalalignment='right')
sns.barplot(ax=ax3,
x=bmi_mas_bajo.index,
y=bmi_mas_bajo.values)
ax3.set_title('Menor BMI')
ax3.set(xlabel=None)
ax3.set_xticklabels(bmi_mas_bajo.index,
rotation=45,
horizontalalignment='right')
plt.show()
df['percentage_female'] = 100 - df['percentage_male']
df
pokemon_hembra = df['percentage_female'].mean()
pokemon_macho = df['percentage_male'].mean()
pokemon_hembra
fig = px.pie(values=[pokemon_hembra, pokemon_macho],
names=['Femeninos', 'Masculinos'], color_discrete_sequence=['#479B55', '#FA0087'])
fig.update_layout(title='% de pokemon por sexo',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig.show()
df.query('attack > defense').head()
linea = pd.DataFrame(dict(x=[0,200],
y=[0,200]))
linea
fig1 = px.scatter(df,
x='defense',
y='attack',
hover_data=['name'],
color='type_1',
color_discrete_sequence=px.colors.qualitative.Dark24)
fig2 = px.line(linea,
x='x',
y='y')
fig2.update_traces(line_color='red')
fig3 = go.Figure(data=fig1.data + fig2.data)
fig3.update_layout(title='Pokemon por ataque/defensa',
xaxis_title = 'Defensa',
yaxis_title = 'Ataque',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig3.show()
!pip install statsmodels==0.13.2 ### ESte lo solicito Deepnote para correr las dos graficas que siguen
fig1 = px.scatter(df,
x='attack',
y='defense',
hover_data=['name'],
color_discrete_sequence=['#FA0087'],
marginal_x='box',
marginal_y='box',
opacity=0.8,
trendline='ols')
fig1.update_layout(title='Grafica de ataque/defensa con linea de tendencia general',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig1.show()
fig2 = px.scatter(df,
x='attack',
y='defense',
hover_data=['name'],
color='type_1',
color_discrete_sequence=px.colors.qualitative.Light24,
marginal_x='box',
marginal_y='box',
opacity=0.8,
trendline='ols')
fig2.update_layout(title='Grafica de ataque/defensa con linea de tendencia por tipo',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig2.show()
columns_corr_high = ['hp', 'attack', 'defense',
'sp_attack', 'sp_defense',
'speed', 'catch_rate',
'height_m', 'weight_kg', 'BMI',]
df[columns_corr_high].corr(method ='pearson').style.background_gradient(cmap = 'coolwarm', axis=None)
fig1 = px.imshow(df[columns_corr_high].corr(),
color_continuous_scale='Portland')
fig1.update_layout(title = 'Heatmaps de correlacion de nuestro dataset por metodo Pearson',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 750,
width = 900)
fig1.show()
df[columns_corr_high].corr()[['weight_kg']].sort_values(by='weight_kg', ascending=False).style.background_gradient(cmap = 'coolwarm',axis=None)
fig1 = px.violin(df,
x='type_1',
y='weight_kg',
color = 'type_1',
box=True,
points="all",
hover_data=df.columns,
color_discrete_sequence=px.colors.qualitative.Light24)
fig1.update_layout(title = 'Grafica para determionar efecto de la variable peso en el tipo de pokemon',
font = dict(family = 'verdana', size = 16, color = 'white'),
template = 'plotly_dark',
height = 500,
width = 900,
legend_bgcolor='rgb(117, 112, 179)')
fig1.show()