import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
df = pd.read_csv("results.csv");
def addWinner(hScore, aScore, hTeam, aTeam):
if(hScore > aScore):
return 1
elif(aScore > hScore) :
return 2
else :
return 0
def addLoser(hScore, aScore, hTeam, aTeam):
if(hScore > aScore):
return aTeam
elif(aScore > hScore) :
return hTeam
else :
return "Equality"
df["winner"] = df.apply(lambda x: addWinner(x["home_score"], x["away_score"], x["home_team"], x["away_team"]), axis=1)
df["loser"] = df.apply(lambda x: addLoser(x["home_score"], x["away_score"], x["home_team"], x["away_team"]), axis=1)
#df = df.drop(df.index[:30000], axis="rows")
input = df.drop(["winner", "loser","home_score", "away_score", "date", "neutral", "city"], axis="columns")
target = pd.DataFrame(df["winner"], columns=["winner"])
le_home_team = LabelEncoder()
le_away_team = LabelEncoder()
le_tournament = LabelEncoder()
le_country = LabelEncoder()
input["home_team_n"] = le_home_team.fit_transform(input["home_team"])
input["away_team_n"] = le_away_team.fit_transform(input["away_team"])
input["tournament_n"] = le_tournament.fit_transform(input["tournament"])
input["country_n"] = le_country.fit_transform(input["country"])
input_n = input.drop(["home_team", "away_team", "tournament", "country"], axis="columns")
input_train, input_test, target_train, target_test = train_test_split(input_n, target, random_state=1, test_size=0.1)
target.head()
model = tree.DecisionTreeClassifier(max_depth=3)
model.fit(input_train,target_train)
model.score(input_test,target_test)
tree.plot_tree(model, filled=True)
model.predict([[7,95,57,69]])