import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import metrics
data = pd.read_csv('car.csv', header= None)
data.columns = ['price','maintenance','n_doors','capacity','size_lug','safety','class']
data.sample(3)
#data.maintenance.unique()
#data.n_doors.unique()
#data.capacity.unique()
data['class'].unique()
#Crear Modelo de Arbol de Desicion / para evaluar si compramos un vehiculo
data.price.replace(('vhigh','high','med','low') , (4,3,2,1) , inplace=True)
data.maintenance.replace(('vhigh','high','med','low') , (4,3,2,1) , inplace=True)
data.n_doors.replace(('2','3','4','5more') , (1,2,3,4) , inplace=True)
data.capacity.replace(('2','4','more') , (1,2,3) , inplace=True)
data.size_lug.replace(('small','med','big') , (1,2,3) , inplace=True)
data.safety.replace(('low','med','high') , (1,2,3) , inplace=True)
data['class'].replace(('unacc','acc','good','vgood') , (1,2,3,4) , inplace=True)
data.head()
# 80% APRENDIZAJE
# 20 PRUEBAS
dataset = data.values
#type(dataset) --> numpy.ndarray
X = dataset[: , 0:6] #All rows , Cols 0 - 6
Y = np.asarray(dataset[:, 6] , dtype='S6') #All rows , 6 (only this col)
#type(Y)
Y
x_train, x_test, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=0)
tr = tree.DecisionTreeClassifier(max_depth=10)
tr.fit(x_train,y_train)
y_pred = tr.predict(x_test)
y_pred
score= tr.score(x_test,y_test)
print("Precision: %0.4f" % (score))