import pandas as pd
football_dict = {
"player": ["Lionel Messi", "Cristiano Ronaldo"],
"year": [2016, 2016],
"goals": [37, 25],
}
football_stats = pd.DataFrame(football_dict)
print(football_stats)
player year goals
0 Lionel Messi 2016 37
1 Cristiano Ronaldo 2016 25
football_list = [
{"player": "Lionel Messi", "year": 2016, "goals": 37},
{"player": "Cristiano Ronaldo", "year": 2016, "goals": 25},
]
football_stats = pd.DataFrame(football_list)
print(football_stats)
player year goals
0 Lionel Messi 2016 37
1 Cristiano Ronaldo 2016 25
df = pd.read_csv('/work/datasets/football_mini_stats.csv')
print(df)
player year goals
0 Lionel Messi 2016 37
1 Cristiano Ronaldo 2016 25
df = pd.read_csv('/work/datasets/football_mini_stats.csv', index_col='player')
print(df)
year goals
player
Lionel Messi 2016 37
Cristiano Ronaldo 2016 25
df = pd.read_csv('/work/datasets/football_stats.csv', index_col='player_id')
df
print(df.sample(3))
player_name year games goals npg shots assists \
player_id
768 Vurnon Anita 2014 19 0 0 8 0
647 Harry Kane 2020 11 8 6 42 10
2518 Marcos Llorente 2019 29 3 3 14 3
yellow_cards red_cards position team_name time
player_id
768 3 0 D M S Newcastle United 1291
647 1 0 F Tottenham 987
2518 4 0 F M S Atletico Madrid 1382
print(df.shape)
(18633, 12)
print(df.size)
223596
print(df.info())
<class 'pandas.core.frame.DataFrame'>
Int64Index: 18633 entries, 8865 to 4363
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 player_name 18633 non-null object
1 year 18633 non-null int64
2 games 18633 non-null int64
3 goals 18633 non-null int64
4 npg 18633 non-null int64
5 shots 18633 non-null int64
6 assists 18633 non-null int64
7 yellow_cards 18633 non-null int64
8 red_cards 18633 non-null int64
9 position 18633 non-null object
10 team_name 18633 non-null object
11 time 18633 non-null int64
dtypes: int64(9), object(3)
memory usage: 1.8+ MB
None
df.describe()
df.sort_values('player_name', ascending=False)
print(df.sort_values(['team_name', 'goals'], ascending=[True, False]).head(3))
player_name year games goals npg shots assists \
player_id
1125 Carlos Bacca 2015 38 18 16 77 2
2014 Jérémy Menez 2014 33 16 8 78 4
1125 Carlos Bacca 2016 32 13 9 56 3
yellow_cards red_cards position team_name time
player_id
1125 2 0 F S AC Milan 3179
2014 3 1 F M S AC Milan 2713
1125 3 0 F S AC Milan 2152
df['team_name'].unique()
df[df.player_name=='Aaron Connolly']
df[df['player_name']=='Aaron Connolly']
df.query('player_name == "Aaron Connolly"')
# Goles de Cristiano Ronaldo en 2014
cr7 = df[(df.player_name == 'Cristiano Ronaldo') & (df.year == 2014)]
cols = ['player_name', 'year', 'goals']
print(cr7[cols])
player_name year goals
player_id
2371 Cristiano Ronaldo 2014 48
# Goles de Ronaldo, Messi y Suárez en 2015
players = ['Cristiano Ronaldo', 'Lionel Messi', 'Luis Suárez']
top_players = df[(df.player_name.isin(players)) & (df.year == 2015)]
print(top_players[cols])
player_name year goals
player_id
2098 Luis Suárez 2015 40
2097 Lionel Messi 2015 26
2371 Cristiano Ronaldo 2015 35
# Goles de Ronaldo, Messi y Suárez en 2015
df = df.reset_index()
df = df.set_index('player_name')
players = ['Cristiano Ronaldo', 'Lionel Messi', 'Luis Suárez']
cols = ['year', 'goals']
top_players = df.loc[players, cols]
print(top_players.query('year==2015'))
year goals
player_name
Cristiano Ronaldo 2015 35
Lionel Messi 2015 26
Luis Suárez 2015 40
# Máxima cantidad de goles hechas por un jugador en una temporada
max_goals = df['goals'].max()
cols = ['goals', 'year', 'team_name']
print(df[df.goals == max_goals][cols])
goals year team_name
player_name
Cristiano Ronaldo 48 2014 Real Madrid
# Porcentaje de disparos al arco que terminan en gol
total_shots = df['shots'].sum()
total_goals = df['goals'].sum()
print(round(total_goals / total_shots * 100, 2))
10.6
# Top 10 jugadores que más goles han marcado
top_players = df.groupby('player_name')['goals'].sum()
print(top_players.sort_values(ascending=False).head(10))
player_name
Lionel Messi 205
Cristiano Ronaldo 194
Robert Lewandowski 174
Luis Suárez 154
Harry Kane 148
Pierre-Emerick Aubameyang 141
Sergio Agüero 128
Edinson Cavani 125
Ciro Immobile 119
Mohamed Salah 117
Name: goals, dtype: int64
# Top 5 equipos con más tarjetas rojas y amarillas
red_cards = df.groupby('team_name')[['red_cards', 'yellow_cards']].sum()
print(red_cards.sort_values('red_cards', ascending=False).head())
red_cards yellow_cards
team_name
AC Milan 43 513
Genoa 42 562
Valencia 39 598
Bologna 38 477
Lazio 35 576
df['goals_assists'] = df.goals + df.assists
best_players = df.groupby('player_name')['goals_assists'].sum()
print(best_players.sort_values(ascending=False).head())
player_name
Lionel Messi 293
Cristiano Ronaldo 246
Luis Suárez 224
Robert Lewandowski 203
Harry Kane 178
Name: goals_assists, dtype: int64
# Aquí tu súper código. ¡Tú puedes!