# importing relevant libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
data = pd.read_csv('real_estate.csv')
data.head()
x = data[['size','year']]
y = data['price']
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)
#conversion of x_train using a function which fits (figures out the params) then normalises.
#Whilst the x_test conversion just transforms, using the same params that it learned from the train data.
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform( x_train )
x_test = scaler.transform( x_test )
reg = LinearRegression()
reg.fit(x_train,y_train)
reg.score(x_test,y_test)
# coefficeints of the trained model
print('Coefficient:', reg.coef_)
# intercept of the model
print('Intercept:', reg.intercept_)
## Prepare the data for Visualization
x_surf, y_surf = np.meshgrid(np.linspace(min(data['size']), max(data['size']), 100),np.linspace(min(data['year']), max(data['year']), 100))
onlyX = pd.DataFrame({'size': x_surf.ravel(), 'year': y_surf.ravel()})
fittedY=reg.predict(onlyX)
## convert the predicted result in an array
fittedY=np.array(fittedY)
fittedY
# Visualize the Data for Multiple Linear Regression
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(20,10))
### Set figure size
ax = fig.add_subplot(111, projection='3d')
ax.scatter(data['size'],data['year'],data['price'],c='red', marker='o', alpha=0.5)
ax.plot_surface(x_surf,y_surf,fittedY.reshape(x_surf.shape), color='b', alpha=0.3)
ax.set_xlabel('Size')
ax.set_ylabel('Year')
ax.set_zlabel('Price')
plt.show()
# code for visualizing SLRM
#plt.scatter(data['x'], data['y'])
#plt.plot(data['x'], reg.predict(x))
# Root Mean Squared Error on training dataset
import math
print('RMSE on train dataset : ', math.sqrt(mean_squared_error(y_train,reg.predict(x_train))))
# Root Mean Squared Error on testing dataset
print('RMSE on test dataset : ', math.sqrt(mean_squared_error(y_test,reg.predict(x_test))))
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree = 4)
x_poly = poly.fit_transform(x)
poly.fit(x_poly, y)
lin2 = LinearRegression()
lin2.fit(x_poly, y)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
bank_data = pd.read_csv('Bank_data.csv')
bank_data.head()
# Removes the index column
b_data = bank_data.drop(['Unnamed: 0'], axis = 1)
# We use the map function to change any 'yes' values to 1 and 'no'values to 0.
b_data['y'] = b_data['y'].map({'yes':1, 'no':0})
b_data
X = b_data[['interest_rate','credit','march','previous','duration']]
Y = b_data['y']
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.2)
model = LogisticRegression()
model.fit(X_train,Y_train)
#code for visualizing simple logistic regression
# plt.scatter(x, y)
# plt.plot(x, model.predict(x))
model.score(X_test,Y_test)
confusion_matrix(Y_test,model.predict(X_test))
from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(y_train,reg.predict(x_train)))
print(mean_absolute_error(y_test,reg.predict(x_test)))
#the accuracy of above multple linear regression model increaseed after STANDARDIZING the predictors
##ENSEMBLE METHOD FOR LOGISTIC REGRESSION MODEL USED ABOVE
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
#Create the object of the GradientBoosting Classifier model
mod = GradientBoostingClassifier(n_estimators=100,max_depth=5)
# fit the model with the training data
mod.fit(X_train.astype('int'),Y_train.astype('int'))
# predict the target on the train dataset
predict_train = mod.predict(X_train)
# Accuray Score on train dataset
accuracy_train = accuracy_score(Y_train.astype('int'),predict_train)
print('\naccuracy_score on train dataset : ', accuracy_train)
# predict the target on the test dataset
predict_test = mod.predict(X_test)
# Accuracy Score on test dataset
accuracy_test = accuracy_score(Y_test.astype('int'),predict_test)
print('\naccuracy_score on test dataset : ', accuracy_test)