import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
world_cups = pd.read_csv("WorldCups.csv",sep = ";")
world_cup_players = pd.read_csv("WorldCupPlayers.csv", sep = ',')
world_cup_matches=pd.read_csv("WorldCupMatches.csv",on_bad_lines='skip',sep = ";")
world_cups.head(3)
world_cup_players.head(3)
world_cup_matches.head(3)
fig=plt.figure()
winner=world_cups['Winner'].value_counts()
winner.plot(kind='bar',title="Champion of FIFA World Cup",fontsize=14,figsize=(12,10))
plt.xlabel('Teams')
plt.ylabel('Number of Winnings')
plt.show()
all_pos = world_cups[['Winner','Runners-Up','Third','Fourth']]
all_positions = all_pos.apply(pd.value_counts).fillna(0).astype(int)#对于空值,我们用0填充
all_positions.plot(y=['Winner','Runners-Up','Third','Fourth'],kind='bar',fontsize=12,figsize=(18,8))
plt.xlabel('Teams')
plt.ylabel('Number of winnings')
#sns.despine(right=True,top=True)
plt.show()
goal=world_cups[['Year','GoalsScored']]
plt.figure(figsize=(16,8))
plt.plot(goal['Year'],goal['GoalsScored'],'-p',color='gray',markersize=15,linewidth=6,markerfacecolor='white',markeredgecolor='gray')
plt.xlim(1930,2018)
plt.ylim(60,180)
plt.xlabel('Year',fontstyle='italic')
plt.ylabel('GoalScored')
plt.show()
#attendance of crowd in WorldCup from 1930 to 2014
world_cups['Attendance']=world_cups['Attendance'].apply(lambda x:x.replace('.',''))#删除小数点
Crowd=world_cups[['Year','Attendance']]
Crowd
#plt.figure(figsize=(15,10))
#sns.barplot(Crowd['Year'].astype(int),Crowd['Attendance'].astype(int))
#plt.title('Crowd present at stadium in year 1930 to 2014')
#plt.show()
plt.figure(figsize=(15,8))
sns.barplot(Crowd['Year'].astype(int),Crowd['Attendance'].astype(int))
plt.ylabel('Attendance')
plt.xlabel('Year')
plt.title('Crowd present at stadium in year 1930 to 2014')
plt.show()
#Average attendance of the crowd in WorldCup Matches from 1930 to 2014
#avg_att=np.array(world_cups['Attendance'].astype(int))/np.array(world_cups['MatchesPlayed'].astype(int))
#avg_att=np.transpose(avg_att)
#avg_att1=world_cups.groupby(world_cups['Year'],avg_att)
avg_att = world_cup_matches.groupby('Year')['Attendance'].mean().reset_index()
#按照‘Year’进行分组计算'Attendance'的均值
# 可以理解为avg_att=world_cups['Attendance']/world_cups['MatchesPlayed']每一场比赛参与人数
plt.figure(figsize=(15,8))
plt.plot(avg_att['Year'],avg_att['Attendance'],'-p',linewidth=4,color='gray',markersize=12)
plt.xlabel('Year')
plt.ylabel('Attendance')
plt.title('Average attendance of crowd in WorldCup Matches')
plt.grid(True)
plt.show()
#Half time Goals of home and away team
Half_time=world_cup_matches.groupby('Year')['Half-time Home Goals','Half-time Away Goals'].sum().reset_index().astype(int)
df=pd.DataFrame({'Half-time Away Goals':Half_time['Half-time Away Goals'],'Half-time Home Goals':Half_time['Half-time Home Goals']})
df.plot(kind='bar',stacked=True,figsize=(18,15))
plt.title('Half time goals of home and away team')
r=range(0,20)
plt.xticks(r,Half_time['Year'])
plt.xlabel('Year')
plt.show()
home_team1=world_cup_matches[['Year','Home Team Goals']]
home_team1.head(840)
#主队
home_team=world_cup_matches[['Year','Home Team Goals']].head(840).astype(int)
plt.figure(figsize=(15,10))
sns.violinplot(x=home_team['Year'],y=home_team['Home Team Goals'],palette='Blues')
plt.grid(True,color='grey',alpha=0.3)
plt.title("Home Team Goals")
plt.show()
#客队 Away Team Goals
away_team=world_cup_matches[['Year','Away Team Goals']].head(840).astype(int)
plt.figure(figsize=(15,10))
sns.violinplot(x=away_team['Year'],y=away_team['Away Team Goals'],palette='Blues')
plt.grid(True,color='grey',alpha=0.3)
plt.title("Away Team Goals")
plt.show()
#Home Team Name
plt.figure(figsize=(15,10))
wordcloud=WordCloud( ).generate(' '.join(world_cup_matches['Home Team Name'].head(840)))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis('off')
plt.show()
#Away Team Name
plt.figure(figsize=(15,10))
wordcloud=WordCloud( ).generate(' '.join(world_cup_matches['Away Team Name'].head(840)))
plt.imshow(wordcloud,interpolation='bilinear')
plt.axis('off')
plt.show()