import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
dataset = pd.read_csv('salarios.csv')
dataset.head(5)
dataset.shape
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:,1].values
X_train, X_test, Y_train, Y_test = train_test_split(x,y, test_size=0.2, random_state = 0) #random_state si es que vamos a modificar los datos
X_train
X_test
regressor = LinearRegression()
regressor.fit(X_train, Y_train)
viz_train = plt
viz_train.scatter(X_train, Y_train, color = 'blue')
viz_train.plot(X_train, regressor.predict(X_train), color = 'black')
viz_train.title('Salario vs Experiencia')
viz_train.xlabel('Experiencia')
viz_train.ylabel('Salario')
viz_train.show()
viz_train = plt
viz_train.scatter(X_test, Y_test, color = 'red')
viz_train.plot(X_train, regressor.predict(X_train), color = 'black')
viz_train.title('Salario vs Experiencia')
viz_train.xlabel('Experiencia')
viz_train.ylabel('Salario')
viz_train.show()
regressor.score(X_test, Y_test)
paises = ['VZ','MX','CO','BR','AR','US']
new_column = [paises[np.random.randint(0,len(paises))] for i in range(30) ]
dataset['Pais'] = new_column
dataset['Paises'] = pd.factorize(dataset['Pais'])[0]
x = dataset[['Aexperiencia','Paises']]
y = dataset['Salario']
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size = 0.6, random_state=0)
regressor.fit(X_train,y_train)
regressor.score(X_test,y_test)
fig = plt.figure(dpi = 150)
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_train['Aexperiencia'],X_train['Paises'],y_train,c='b',marker='s')
ax.scatter(X_train['Aexperiencia'],X_train['Paises'],regressor.predict(X_train),c='r',marker='o')
ax.set_xlabel('$Años de experiencia$')
ax.set_ylabel('$Pais Normalizado$')
ax.set_zlabel('$Salario$')