import pandas as pd
from pandas import Series
import seaborn as sns
import numpy as np
from typing import Dict
sns.set_theme()
scores = pd.read_csv("./Golf - Scores.csv")
course = pd.read_csv("./Golf - Course.csv")
west_course = [f"W{i}" for i in range(1,10)]
east_course = [f"E{i}" for i in range(1,10)]
south_course = [f"S{i}" for i in range(1,10)]
all_courses = [*west_course, *east_course, *south_course]
def get_hole(hole: str) -> Dict:
holes = course[course["Hole"] == hole].to_dict("index")
key = list(holes.keys())[0]
return holes[key]
def get_courses_played(round: Series) -> str:
courses = ""
if row[east_course[0]] > 0:
courses += "E"
if row[west_course[0]] > 0:
courses += "W"
if row[south_course[0]] > 0:
courses += "S"
return courses
for i, row in scores.iterrows():
scores.at[i, "courses"] = get_courses_played(row)
total_score = 0
total_par = 0
for h in all_courses:
score = row[h]
if score > 0:
hole = get_hole(h)
total_par += hole["Par"]
total_score += score
scores.at[i, f"{h}_par_diff"] = score - hole["Par"]
else:
scores.at[i, f"{h}_par_diff"] = np.nan
scores.at[i, "total_score"] = total_score
scores.at[i, "total_par"] = total_par
scores.at[i, "score_diff_par"] = total_score - total_par
ax = sns.lmplot(data=scores.reset_index(), x="index", y="total_score", height=6, aspect=3)
ax.set(ylabel = "Score")
sns.set(rc={'figure.figsize':(20,10)})
ax = sns.lmplot(data=scores.reset_index(), x="index", y="total_score", hue="courses", height=6, aspect=3)
ax.set(ylabel = "Score")
df_holes = pd.DataFrame(columns=["hole", "score", "stroke_index", "par", "net_par"])
for i, row in scores.iterrows():
for h in all_courses:
score = row[h]
if score > 0:
hole = get_hole(h)
net_par = score - hole["Par"]
df_holes = df_holes.append({"hole":h, "score":score, "stroke_index": hole["Stroke Index"], "par": hole["Par"], "net_par":net_par}, ignore_index=True)
df_holes['net_par'] = df_holes['net_par'].astype(int)
df_holes['par'] = df_holes['par'].astype(int)
df_holes['score'] = df_holes['score'].astype(int)
sns.histplot(df_holes, x="net_par", hue="par", multiple="dodge", binwidth=1)
sns.histplot(df_holes, x="net_par", hue="par", multiple="dodge", binwidth=1, stat="density", common_norm=False)
df_si = pd.pivot_table(df_holes, values="score", index=["stroke_index", "net_par"], aggfunc="count")
df_si = df_si.reset_index()
sns.heatmap(df_si.pivot("stroke_index", "net_par", "score"))
df_mean_by_st_index = df_holes.groupby("stroke_index").mean().reset_index()
sns.barplot(data=df_mean_by_st_index, x="stroke_index", y="net_par", palette="Greens_d")
df_score_by_hole = df_holes.groupby(["hole", "stroke_index"]).mean().reset_index()
sns.barplot(data=df_score_by_hole, x="hole", hue="stroke_index", dodge=False, y="net_par", palette="Greens_d")
df_score_by_hole.sort_values("net_par").head()
df_score_by_hole.sort_values("net_par", ascending=False).head()
df_ave = df_holes.groupby("hole").mean().reset_index()
ave = df_ave[df_ave["hole"].isin(east_course)]["score"].sum()
print(f"East - Average round: {ave}")
ave = df_ave[df_ave["hole"].isin(west_course)]["score"].sum()
print(f"West - Average round: {ave}")
ave = df_ave[df_ave["hole"].isin(south_course)]["score"].sum()
print(f"South - Average round: {ave}")
East - Average round: 44.88235294117647
West - Average round: 44.421052631578945
South - Average round: 45.0
df_err = df_holes.groupby("hole").std().reset_index()
df_err["std"] = df_err["net_par"]
con = df_err.sort_values("net_par").filter(items=["hole", "std"], axis=1)
con_hole = con.iloc[0]["hole"]
con.head()
l_con = con.sort_values("std", ascending=False).head()
l_con_hole = l_con.iloc[0]["hole"]
l_con
sns.barplot(data=con, x="hole", y="std", palette="Greens_d")
df_con = df_holes[(df_holes["hole"] == con_hole) | (df_holes["hole"] == l_con_hole)]
sns.histplot(df_con, x="net_par", hue="hole", multiple="dodge", binwidth=1)
df_si_con = df_holes.groupby("stroke_index").std().reset_index()
sns.barplot(data=df_si_con, y="net_par", x="stroke_index", palette="Greens_d")
df_best = df_holes.groupby("hole").min().reset_index()
df_best[df_best["hole"].isin(east_course)]
df_best[df_best["hole"].isin(west_course)]
df_best[df_best["hole"].isin(south_course)]