import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from wordcloud import WordCloud
import session_info
logging.basicConfig(
format = '%(message)s',
level=logging.INFO,
encoding="utf-8"
)
Conteo de variables
april_23 = pd.read_excel('kcapril23.xlsx')
logging.info(f'En total hubieron {april_23.shape[0]} duelistas con {april_23["Deck"].nunique()} mazos distintos')
april_23.sample(5)
Limpieza y Manipulación de Datos
logging.info('Correción de poner todo en minúscula las columnas')
april_23.columns = april_23.columns.str.lower()
april_23.columns
#april_23.query("skill=='Alianza de Heros'")
april_23['skill'] = april_23['skill'].replace(['Alianza de Heros'], 'Alianza de Héroes')
logging.info('Encontré una fe de errata en las skills mal tipeo de Heros mezclando idiomas')
Registro de la comunidad
logging.info('Gráfico de cuántos usuarios reportaron sus ndmax a los servers')
servers = april_23.iloc[:, 5:10].astype('int').sum()
comunidad = pd.DataFrame(
{
'Servers': servers.to_dict().keys(),
'Duelistas': servers.to_dict().values(),
}
)
comunidad = comunidad.sort_values('Duelistas', ascending=0)
fig = px.bar(comunidad, y='Duelistas', x="Servers",
title="Registro de decks con su comunidad",
text="Duelistas", color="Servers", template="ggplot2")
fig.update_layout(showlegend=False)
fig.show()
Usuarios por día
dias = (
april_23.groupby(['day'])['day'].count()
.reset_index(name='users'))
fig = px.line(dias, x="day", y="users", title="Usuarios que llegan a NDmax por día", text="users", template='ggplot2')
fig.update_traces(textposition="top center")
fig.show()
NDmax Decks
#Haciendo datos para hablar del top 5 y el resto de mazos
deck_count = (
april_23.groupby(['deck'])['deck'].count()
.reset_index(name='total')
.sort_values('total', ascending=0)
)
user_topfive = deck_count[0:5].total.sum()
other_user_decks = deck_count[5::].total.sum()
count_groupby_decks = [int(user_topfive), int(other_user_decks)]
labels = "Top 5 mazos \n más usados", "Resto\n de mazos"
plt.pie(count_groupby_decks, labels=labels, autopct="%1.0f%%")
plt.title("Comparativa Top 5 vs Resto de mazos")
plt.show()
list_topfive = '\n '.join(deck_count['deck'][0:5].tolist())
logging.info(f'Los 5 mazos más utilizados por la comunidad son: \n {list_topfive}\nSuman en conjunto {user_topfive} usuarios.')
resume = (
april_23.groupby(['deck','skill'])['skill'].size()
.reset_index(name='total')
.sort_values('total', ascending=0)
)
pecas = resume.query("deck=='Phantom Knights'")
hero = resume.query("deck=='Elemental Heros'")
resonator = resume.query("deck=='Resonators'")
infernoid = resume.query("deck=='Infernoid'")
solfachord = resume.query("deck=='Solfachord'")
topfive=pd.concat([infernoid, solfachord, resonator, hero, pecas], ignore_index=True)
fig = px.bar(topfive, y='deck', x="total", color="skill",
title="Top Decks NDmax KC CUP Abril 2023", text="total",
orientation='h', template="ggplot2")
fig.update_layout(showlegend=False)
fig.show()
decks = april_23.copy()
decks = decks.drop(decks[(decks['deck'] == "Phantom Knights")
| (decks['deck'] == "Elemental Heros")
| (decks['deck'] == "Resonators")
| (decks['deck'] == "Infernoid")
| (decks['deck'] == "Solfachord")].index)
decks = decks.replace("-", ' ', regex=True)
decks = decks.replace("/", ' ', regex=True)
decks = decks.replace(" ", '', regex=True)
text_decks = ' '.join(decks.fillna('')['deck'].tolist())
wc = WordCloud(
width = 2560, height = 1440,
background_color = "mintcream",
colormap = "Dark2"
).generate(text_decks)
plt.axis("off")
plt.imshow(wc, interpolation = "bilinear")
plt.show()
session_info.show()