import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
dataset = pd.read_csv('salarios_dc1975e5-393f-4b89-9b04-a2b48ce17388.csv')
dataset.head()
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size= 0.2, random_state=0)
X_test
regressor = LinearRegression()
regressor.fit(X_train, y_train)
regressor.score(X_train, y_train)
viz_train = plt
viz_train.scatter(X_train, y_train, color = 'blue')
viz_train.plot(X_train, regressor.predict(X_train), color= 'black')
viz_train.title('Salario vs Experiencia')
viz_train.xlabel('Experiencia en años')
viz_train.ylabel('Salario')
viz_train.show()
viz_train = plt
viz_train.scatter(X_test, y_test, color = 'blue')
viz_train.plot(X_test, regressor.predict(X_test), color= 'black')
viz_train.title('Salario vs Experiencia')
viz_train.xlabel('Experiencia en años')
viz_train.ylabel('Salario')
viz_train.show()
dataset = pd.read_csv('salarios_dc1975e5-393f-4b89-9b04-a2b48ce17388.csv')
paises = ['VZ','MX','CO','BR','AR','US']
new_column = [paises[np.random.randint(0,len(paises))] for i in range(30) ]
dataset['Pais'] = new_column
dataset['NormPaises'] = pd.factorize(dataset['Pais'])[0]
x = dataset[['Aexperiencia','NormPaises']]
y = dataset['Salario']
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state=0)
regressor.fit(X_train,y_train)
regressor.score(X_test,y_test)
fig = plt.figure(dpi = 150)
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_train['Aexperiencia'],X_train['NormPaises'],y_train,c='green',marker='s')
ax.scatter(X_test['Aexperiencia'],X_test['NormPaises'],regressor.predict(X_test),c='r',marker='o')
ax.set_xlabel('$Años de experiencia$')
ax.set_ylabel('$Pais Normalizado$')
ax.set_zlabel('$Salario$')