# Start writing code here...
SELECT *
FROM 'best_selling_switch_games.csv'
Librerías importadas
import pandas as pd
df = pd.read_csv('best_selling_switch_games.csv')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import datetime as dt
df.info()
Estadísticos descriptivos
df['copies_sold'].mean()
df['copies_sold'].median()
Top 10 juegos con mayores ventas
top_10_games = df[['title', 'copies_sold']].sort_values(by=['copies_sold'], ascending = False).head(10)
plt.figure(figsize=(10,10))
plt.bar(top_10_games['title'], top_10_games['copies_sold']/1000000)
plt.title('Top 10 Best-Selling Games', fontsize = 15)
plt.ylabel('Millions')
plt.xticks(rotation = 30, ha ='right')
plt.show()
top_5_games = df[['genre', 'copies_sold']].sort_values(by=['copies_sold'], ascending = False).head(5)
Top 5 Géneros con mayores ventas
colors = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e"]
color_dict = {"Kart racing":colors[0], "Social simulation":colors[1], "Fighting":colors[2], "Action-adventure":colors[3], "Role-playing":colors[4]}
colors = [color_dict[x] for x in top_5_games['genre']]
plt.figure(figsize=(10,10))
barlist = plt.bar(top_5_games['genre'], top_5_games['copies_sold']/1000000, color=colors)
for i,bar in enumerate(barlist):
plt.annotate("{:.2f}M".format(top_5_games['copies_sold'][i]/1000000),xy=(bar.get_x() + bar.get_width()/2, bar.get_height()), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', rotation = 0)
plt.ylabel('Millions')
plt.xticks(rotation = 30, ha ='right')
plt.show()
Evolutivo ventas por año "Periodo 2018 - 2022"
df['as_of'] = pd.to_datetime(df['as_of'])
df['year'] = df['as_of'].dt.year.astype('str')
year_df = df.groupby('year').sum().reset_index()
year_df
colors = ["#9b59b6", "#3498db", "#95a5a6", "#e74c3c", "#34495e"]
color_dict = {'2018':colors[0], '2019':colors[1], '2020':colors[2], '2021':colors[3], '2022':colors[4]}
colors = [color_dict[x] for x in year_df['year']]
plt.figure(figsize=(10,10))
barlist = plt.bar(year_df['year'], year_df['copies_sold']/1000000, color=colors)
for i,bar in enumerate(barlist):
plt.annotate("{:.2f}M".format(year_df['copies_sold'][i]/1000000),xy=(bar.get_x() + bar.get_width()/2, bar.get_height()), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', rotation = 0)
plt.ylabel('Million')
plt.xlabel('Year')
plt.title('Number of Games Sold in 2018 - 2022',fontsize = 15)
plt.show()