import pandas as pd
football_dict = {
"player": ["Lionel Messi", "Cristiano Ronaldo"],
"year": [2016, 2016],
"goals": [37, 25],
}
football_stats = pd.DataFrame(football_dict)
print(football_stats)
football_list = [
{"player": "Lionel Messi", "year": 2016, "goals": 37},
{"player": "Cristiano Ronaldo", "year": 2016, "goals": 25},
]
football_stats = pd.DataFrame(football_list)
print(football_stats)
df = pd.read_csv('/work/datasets/football_mini_stats.csv')
print(df)
df = pd.read_csv('/work/datasets/football_mini_stats.csv', index_col='player')
print(df)
df = pd.read_csv('/work/datasets/football_stats.csv', index_col='player_id')
df
print(df.sample(3))
print(df.shape)
print(df.size)
print(df.info())
df.describe()
df.sort_values('player_name', ascending=False)
print(df.sort_values(['team_name', 'goals'], ascending=[True, False]).head(3))
df['team_name'].unique()
df[df.player_name=='Aaron Connolly']
df[df['player_name']=='Aaron Connolly']
df.query('player_name == "Aaron Connolly"')
# Goles de Cristiano Ronaldo en 2014
cr7 = df[(df.player_name == 'Cristiano Ronaldo') & (df.year == 2014)]
cols = ['player_name', 'year', 'goals']
print(cr7[cols])
# Goles de Ronaldo, Messi y Suárez en 2015
players = ['Cristiano Ronaldo', 'Lionel Messi', 'Luis Suárez']
top_players = df[(df.player_name.isin(players)) & (df.year == 2015)]
print(top_players[cols])
# Goles de Ronaldo, Messi y Suárez en 2015
df = df.reset_index()
df = df.set_index('player_name')
players = ['Cristiano Ronaldo', 'Lionel Messi', 'Luis Suárez']
cols = ['year', 'goals']
top_players = df.loc[players, cols]
print(top_players.query('year==2015'))
# Máxima cantidad de goles hechas por un jugador en una temporada
max_goals = df['goals'].max()
cols = ['goals', 'year', 'team_name']
print(df[df.goals == max_goals][cols])
# Porcentaje de disparos al arco que terminan en gol
total_shots = df['shots'].sum()
total_goals = df['goals'].sum()
print(round(total_goals / total_shots * 100, 2))
# Top 10 jugadores que más goles han marcado
top_players = df.groupby('player_name')['goals'].sum()
print(top_players.sort_values(ascending=False).head(10))
# Top 5 equipos con más tarjetas rojas y amarillas
red_cards = df.groupby('team_name')[['red_cards', 'yellow_cards']].sum()
print(red_cards.sort_values('red_cards', ascending=False).head())
df['goals_assists'] = df.goals + df.assists
best_players = df.groupby('player_name')['goals_assists'].sum()
print(best_players.sort_values(ascending=False).head())
# Aquí tu súper código. ¡Tú puedes!