#importing used libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# loading the data using pandas
df = pd.read_csv('vgsales.csv')
# preview of the data
df.head()
# observing the statistical description of the dataset
df.describe()
# getting more information about the dataset's datatype
df.info()
#Your solution
gen_count = df['Genre'].value_counts().sort_values(ascending=True) #
gen_count.plot.barh(title = "Distribution of Genre", figsize=(10, 6)).set(xlabel="Count", ylabel = "Genre");
# Your solution
# remove invalid values from dataset
df.dropna(inplace=True)
# transfrom Year column to type int
df['Year'] = df['Year'].astype(int)
df.dtypes
# Calculate the number of games (`Name`) released per year and save it into `year_game`. Tip: after calculating reset the index.
year_game = df.groupby('Year')['Name'].count().reset_index()
year_game
# plot and add title
year_game.plot(title="Number of Games released per year", kind='bar', figsize=(12,6), x="Year");
# Your solution
# total amount of releases per year, get index of top 3
top_3year_release = df['Year'].value_counts().iloc[:3].index
top_3year_release
# plot finings, and encode genre in hue
plt.figure(figsize=(18, 6))
sns.countplot(x="Year", hue='Genre', data=df, order=top_3year_release, palette='mako')
plt.xticks(size=25, rotation=0)
plt.title("Top 3 years game releases by genre", fontsize=20)
plt.show()
# original line chart provided
plt.figure(figsize=(15,8))
sns.lineplot(x=df["Genre"], y=df["JP_Sales"])
sns.lineplot(x=df["Genre"], y=df["NA_Sales"])
sns.lineplot(x=df["Genre"], y=df["EU_Sales"])
sns.lineplot(x=df["Genre"], y=df["Global_Sales"])
plt.title("Sales by Genre per Region", size=15)
plt.ylabel("Sales")
plt.legend(["JP", "NA", "EU", "Global"])
plt.show()
# Your solution
from matplotlib.lines import Line2D
# USING POINTPLOT
plt.figure(figsize=(15,8))
sns.pointplot(x=df['Genre'], y=df['JP_Sales'], color='pink')
sns.pointplot(x=df['Genre'], y=df['NA_Sales'], color='blue')
sns.pointplot(x=df["Genre"], y=df["EU_Sales"], color='green')
sns.pointplot(x=df["Genre"], y=df["Global_Sales"], color='magenta')
# 2d Lines for legend
a = Line2D([], [], color='pink', label='JP_Sales')
b = Line2D([], [], color='blue', label='NA_Sales')
c = Line2D([], [], color='green', label='EU_Sales')
d = Line2D([], [], color='magenta', label='Global_Sales')
plt.legend(handles=[a, b, c, d])
plt.title("Sales by Genre per Region", size=15)
plt.ylabel("Sales")
plt.show()