import pandas as pd
import sqlite3
import seaborn as sns
import matplotlib.pyplot as plt
db_path = '/work/basketball.sqlite'
con = sqlite3.connect(db_path) # create connection object to database
print("SQL database connected")
table = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table'", con)
print(table)
game_df = pd.read_sql_query("SELECT * from game;", con)
game_df
year_df = pd.DataFrame(pd.DatetimeIndex(pd.to_datetime(game_df["GAME_DATE"])).year)
year_df = pd.DataFrame(year_df.GAME_DATE.value_counts()).reset_index(level=0).sort_values('index')
year_df.rename(columns={"index": "Year", "GAME_DATE": "Count"}, inplace=True)
year_df.plot("Year", "Count").set_title("Number of NBA Season Games Played vs. Year")
merge_df = game_df.copy()
merge_df["Year"] = pd.DataFrame(pd.DatetimeIndex(pd.to_datetime(game_df["GAME_DATE"])).year)
years = list(pd.DatetimeIndex(pd.to_datetime(game_df["GAME_DATE"])).year.unique())
teams_df = game_df["TEAM_ABBREVIATION_HOME"].unique()
num_teams = []
for year in years:
num_teams.append(len(merge_df[merge_df["Year"] == year]["TEAM_ABBREVIATION_HOME"].unique()))
pd.DataFrame({"Years": years, "Num Teams": num_teams}).plot("Years", "Num Teams").set_title("Number of NBA Teams vs. Year")
game_df["PF_HOME"].mean()
important_columns = ["GAME_ID", "LAST_GAME_HOME_TEAM_POINTS", "LAST_GAME_VISITOR_TEAM_POINTS", "WL_HOME"]
home_columns = ["OREB_HOME", 'OREB_HOME','DREB_HOME', 'REB_HOME', 'AST_HOME', 'STL_HOME', 'BLK_HOME', 'TOV_HOME', 'PF_HOME']
away_columns = [column.replace("HOME", "AWAY") for column in home_columns]
important_columns += home_columns + away_columns
important_columns
extraction_df = game_df[important_columns]
extraction_df.isna().sum()
extraction_df = extraction_df.dropna()
len(extraction_df)
wins = len(extraction_df[extraction_df.WL_HOME == 'W'])
losses = len(extraction_df[extraction_df.WL_HOME == 'L'])
home_won = (wins)/(wins + losses)
home_won
extraction_df.columns
blk_winner_home = extraction_df[extraction_df.WL_HOME == 'W'].BLK_HOME.astype(float).dropna().sum()
blk_winner_away = extraction_df[extraction_df.WL_HOME == 'L'].BLK_AWAY.astype(float).dropna().sum()
blk_winner = (blk_winner_home + blk_winner_away)/len(extraction_df)
print('Winners average blocks:', blk_winner)
blk_loser_home = extraction_df[extraction_df.WL_HOME == 'L'].BLK_HOME.astype(float).dropna().sum()
blk_loser_away = extraction_df[extraction_df.WL_HOME == 'W'].BLK_AWAY.astype(float).dropna().sum()
blk_loser = (blk_loser_home + blk_loser_away)/len(extraction_df)
print('Losers average blocks', blk_loser)
reb_winner_home = extraction_df[extraction_df.WL_HOME == 'W'].REB_HOME.astype(float).dropna().sum()
reb_winner_away = extraction_df[extraction_df.WL_HOME == 'L'].REB_AWAY.astype(float).dropna().sum()
reb_winner = (reb_winner_home + reb_winner_away)/len(extraction_df)
print('Winners average rebounds:', reb_winner)
reb_loser_home = extraction_df[extraction_df.WL_HOME == 'L'].REB_HOME.astype(float).dropna().sum()
reb_loser_away = extraction_df[extraction_df.WL_HOME == 'W'].REB_AWAY.astype(float).dropna().sum()
reb_loser = (reb_loser_home + reb_loser_away)/len(extraction_df)
print('Losers average rebounds:', reb_loser)
tov_winner_home = extraction_df[extraction_df.WL_HOME == 'W'].TOV_HOME.astype(float).dropna().sum()
tov_winner_away = extraction_df[extraction_df.WL_HOME == 'L'].TOV_AWAY.astype(float).dropna().sum()
tov_winner = (tov_winner_home + tov_winner_away)/len(extraction_df)
print('Winners average turnovers:', tov_winner)
tov_loser_home = extraction_df[extraction_df.WL_HOME == 'L'].TOV_HOME.astype(float).dropna().sum()
tov_loser_away = extraction_df[extraction_df.WL_HOME == 'W'].TOV_AWAY.astype(float).dropna().sum()
tov_loser = (tov_loser_home + tov_loser_away)/len(extraction_df)
print('Losers average turnovers:', tov_loser)
stl_winner_home = extraction_df[extraction_df.WL_HOME == 'W'].STL_HOME.astype(float).dropna().sum()
stl_winner_away = extraction_df[extraction_df.WL_HOME == 'L'].STL_AWAY.astype(float).dropna().sum()
stl_winner = (stl_winner_home + stl_winner_away)/len(extraction_df)
print('Winners average steals:', stl_winner)
stl_loser_home = extraction_df[extraction_df.WL_HOME == 'L'].STL_HOME.astype(float).dropna().sum()
stl_loser_away = extraction_df[extraction_df.WL_HOME == 'W'].STL_AWAY.astype(float).dropna().sum()
stl_loser = (stl_loser_home + stl_loser_away)/len(extraction_df)
print('Losers average steals:', stl_loser)
pf_winner_home = extraction_df[extraction_df.WL_HOME == 'W'].PF_HOME.astype(float).dropna().sum()
pf_winner_away = extraction_df[extraction_df.WL_HOME == 'L'].PF_AWAY.astype(float).dropna().sum()
pf_winner = (pf_winner_home + pf_winner_away)/len(extraction_df)
print('Winners average personal fouls:', pf_winner)
pf_loser_home = extraction_df[extraction_df.WL_HOME == 'L'].PF_HOME.astype(float).dropna().sum()
pf_loser_away = extraction_df[extraction_df.WL_HOME == 'W'].PF_AWAY.astype(float).dropna().sum()
pf_loser = (pf_loser_home + pf_loser_away)/len(extraction_df)
print('Losers average personal fouls:', pf_loser)
blk_dif = round((blk_winner - blk_loser), 1)
reb_dif = round((reb_winner - reb_loser), 1)
tov_dif = round((tov_loser - tov_winner), 1)
stl_dif = round((stl_winner - stl_loser), 1)
pf_dif = round((pf_loser - pf_winner), 1)
print('The winner averages:', blk_dif, 'more blocks,', reb_dif, 'more rebounds, ', tov_dif, 'fewer turnovers, ', stl_dif, 'more steals, ', pf_dif, 'fewer personal fouls.')