#importing used libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# loading the data using pandas
df = pd.read_csv('vgsales-1.csv')
# preview of the data
df.head()
# observing the statistical description of the dataset
df.describe()
# getting more information about the dataset's datatype
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16598 entries, 0 to 16597
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Rank 16598 non-null int64
1 Name 16598 non-null object
2 Platform 16598 non-null object
3 Year 16327 non-null float64
4 Genre 16598 non-null object
5 Publisher 16540 non-null object
6 NA_Sales 16598 non-null float64
7 EU_Sales 16598 non-null float64
8 JP_Sales 16598 non-null float64
9 Other_Sales 16598 non-null float64
10 Global_Sales 16598 non-null float64
dtypes: float64(6), int64(1), object(4)
memory usage: 1.4+ MB
#Your solution
plt.figure(figsize=(10,6))
gen_count = (df['Genre'].value_counts()).sort_values(ascending=True)
gen_count.plot.barh();
plt.xlabel("Number count")
plt.ylabel("Game Genre")
# Your solution
df.dropna(inplace=True)
df['Year'] = df['Year'].astype(int)
year_game = df.groupby('Year')['Name'].count()
year_game
year_game.plot(kind='bar', figsize=(12,6), x="Year")
plt.ylabel("Games released")
plt.xlabel("Year")
# Your solution
newcount = df['Year'].value_counts()
newcount
top_3year_release = newcount.sort_values(ascending=False).head(3)
top_3year_release
#top_3year_release.plot(kind='bar', figsize=(12,6), x="Year")
plt.figure(figsize=(18, 6))
sns.countplot(x=df['Year'], hue=df['Genre'])
plt.xticks(size=20, rotation=0)
plt.title("Top 3 years game releases by genre",fontsize=20)
plt.show()
top_3year_release.plot(kind='bar', figsize=(12,6), x="Year")
# original line chart provided
plt.figure(figsize=(15,8))
sns.lineplot(x=df["Genre"], y=df["JP_Sales"])
sns.lineplot(x=df["Genre"], y=df["NA_Sales"])
sns.lineplot(x=df["Genre"], y=df["EU_Sales"])
sns.lineplot(x=df["Genre"], y=df["Global_Sales"])
plt.title("Sales by Genre per Region", size=15)
plt.ylabel("Sales")
plt.legend(["JP", "NA", "EU", "Global"])
plt.show()
# Your solution relplot
dfNew = df.drop(columns=['Rank', 'Year'])
ord_genre = ['Sports', 'Platform', 'Racing', 'Role-Playing', 'Puzzle', 'Misc', 'Shooter', 'Simulation','Action','Adventure', 'Strategy']
df_pivot = pd.pivot_table(dfNew, index=['Genre'],aggfunc= sum ,fill_value=0)
df_pivot = df_pivot.reindex(ord_genre)
plt.figure(figsize=(18, 6))
df_pivot.plot()