ELEC 478 HW 1

import sklearn import numpy as np from numpy.linalg import inv from numpy.linalg import pinv import matplotlib.pyplot as plt from sklearn.preprocessing import scale import random from sklearn.pipeline import Pipeline from sklearn.preprocessing import PolynomialFeatures import pandas as pd import seaborn as sns from sklearn.datasets import load_boston from sklearn.linear_model import Ridge, LinearRegression, Lasso, ElasticNet import itertools import time import statsmodels.api as sm import matplotlib.pyplot as plt import math from sklearn.metrics import mean_squared_error as mse

# Describing data boston_dataset = load_boston() print(boston_dataset.keys()) print(boston_dataset.DESCR)

boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names) boston['MEDV'] = boston_dataset.target boston

boston.isnull().sum()

sns.set(rc={'figure.figsize':(11.7,8.27)}) sns.distplot(boston['MEDV'], bins=30) plt.show()

correlation_matrix = boston.corr().round(2) # annot = True to print the values inside the square sns.heatmap(data=correlation_matrix, annot=True, cmap="YlGnBu")

plt.figure(figsize=(20, 5)) features = ['LSTAT', 'RM'] target = boston['MEDV'] for i, col in enumerate(features): plt.subplot(1, len(features) , i+1) x = boston[col] y = target plt.scatter(x, y, marker='o') plt.title(col) plt.xlabel(col) plt.ylabel('MEDV')

boston_dataset = load_boston() boston = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names) boston['MEDV'] = boston_dataset.target

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

# This function is to be used for best subsets and step-wise approaches. def processSubset(features): # Fit model on features and calculate RSS selected = sm.OLS(y,X[list(features)]) regr = selected.fit() RSS = ((regr.predict(X[list(features)]) - y) ** 2).sum() return {"Selected":regr, "RSS":RSS}

def bestSubsets(k): results = [] for combo in itertools.combinations(X.columns, k): results.append(processSubset(combo)) # Put in dataframe results = pd.DataFrame(results) # Choose the model with the lowest RSS best_result = results.loc[results['RSS'].argmin()] # Return the best model return best_result

# We see the best subsets up to 13 predictors models_best = pd.DataFrame(columns=["RSS", "Selected"]) for i in range(1,len(X.columns)+1): t0 = time.time() models_best.loc[i] = bestSubsets(i) t1 = time.time() print(f"Finished {i} features in {t1-t0} seconds.")

print(models_best.loc[3, "Selected"].summary())

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

def forward(predictors): # Pull out predictors we still need to process remaining = [p for p in X.columns if p not in predictors] results = [] for p in remaining: results.append(processSubset(predictors+[p])) # Wrap everything up in a nice dataframe results = pd.DataFrame(results) # Choose the model with the highest RSS best_result = results.loc[results['RSS'].argmin()] # Return the best result return best_result

models_fwd = pd.DataFrame(columns=["RSS", "Selected"]) predictors = [] for i in range(1,len(X.columns)+1): t0 = time.time() models_fwd.loc[i] = forward(predictors) predictors = models_fwd.loc[i]["Selected"].model.exog_names t1 = time.time() print(f"Finished {i} predictors in {t1-t0} seconds.")

print(models_fwd.loc[3, "Selected"].summary())

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

def backward(predictors): results = [] for combo in itertools.combinations(predictors, len(predictors)-1): results.append(processSubset(combo)) # put results in a dataframe results = pd.DataFrame(results) # Choose the model with the highest RSS best_result = results.loc[results['RSS'].argmin()] # Return the best result return best_result

models_bwd = pd.DataFrame(columns=["RSS", "Selected"], index = range(1,len(X.columns))) predictors = X.columns while(len(predictors) > 1): t0 = time.time() models_bwd.loc[len(predictors)-1] = backward(predictors) predictors = models_bwd.loc[len(predictors)-1]["Selected"].model.exog_names t1 = time.time() print(f"Down to {len(predictors)} predictors in {t1-t0} seconds.")

print(models_bwd.loc[3, "Selected"].summary())

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

lasso = Lasso(alpha=0.1, normalize=True) # sets all but 3 features to 0 lasso.fit(X, y) lasso_coeffs = lasso.coef_ # Put in dataframe lasso_coeffs = pd.DataFrame(lasso_coeffs, index=X.columns) lasso_coeffs = lasso_coeffs.transpose() # Drop zeros lasso_coeffs.loc[:, (lasso_coeffs != 0).any(axis=0)]

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

enet = ElasticNet(alpha=0.4, l1_ratio=0.5, normalize=True) # set all but 3 features to 0 enet.fit(X, y) enet_coeffs = enet.coef_ # Put in dataframe enet_coeffs = pd.DataFrame(enet_coeffs, index=X.columns) enet_coeffs = enet_coeffs.transpose() # Drop zeros enet_coeffs.loc[:, (enet_coeffs != 0).any(axis=0)]

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

# ############################################################################# # Compute paths n_alphas = 200 alphas = np.logspace(-5,2, n_alphas) coefs = [] for a in alphas: # Evaluate the models using crossvalidation lasso = Lasso(alpha=a) lasso.fit(X, y) coefs.append(lasso.coef_) # ############################################################################# # Display results plt.figure(figsize=[15,10]) ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis plt.xlabel('lambda') plt.ylabel('weights') plt.title('Lasso coefficients as a function of the regularization') plt.axis('tight') plt.show()

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

# ############################################################################# # Compute paths n_alphas = 200 alphas = np.logspace(-5,2, n_alphas) for l1 in [0.2]: coefs = [] for a in alphas: # Evaluate the models using crossvalidation enet = ElasticNet(alpha=a, l1_ratio=l1) enet.fit(X, y) coefs.append(enet.coef_) # ############################################################################# # Display results plt.figure(figsize=[15,10]) ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis plt.xlabel('lambda') plt.ylabel('weights') plt.title(f'Elastic net coefficients as a function of the regularization l1_ratio = {l1}') plt.axis('tight') plt.show()

# ############################################################################# # Compute paths n_alphas = 200 alphas = np.logspace(-5,2, n_alphas) for l1 in [0.8]: coefs = [] for a in alphas: # Evaluate the models using crossvalidation enet = ElasticNet(alpha=a, l1_ratio=l1) enet.fit(X, y) coefs.append(enet.coef_) # ############################################################################# # Display results plt.figure(figsize=[15,10]) ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis plt.xlabel('lambda') plt.ylabel('weights') plt.title(f'Elastic net coefficients as a function of the regularization l1_ratio = {l1}') plt.axis('tight') plt.show()

y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

# ############################################################################# # Compute paths n_alphas = 200 alphas = np.logspace(-3,7, n_alphas) coefs = [] for a in alphas: # Evaluate the models using crossvalidation ridge = Ridge(alpha=a) ridge.fit(X, y) coefs.append(ridge.coef_) # ############################################################################# # Display results plt.figure(figsize=[15,10]) ax = plt.gca() ax.plot(alphas, coefs) ax.set_xscale('log') ax.set_xlim(ax.get_xlim()[::-1]) # reverse axis plt.xlabel('lambda') plt.ylabel('weights') plt.title('Ridge coefficients as a function of the regularization') plt.axis('tight') plt.show()

''' 2) a) ''' boston_dataset = load_boston() X = boston_dataset.data.copy() y = boston_dataset.target.copy() lr_intercept = LinearRegression() lr_intercept.fit(X,y) print(lr_intercept.coef_) mean_X = X.mean(axis = 0) mean_y = y.mean() X_centered = boston_dataset.data.copy() y_centered = boston_dataset.target.copy() for i in range(X.shape[0]): X_centered[i] = X_centered[i] - mean_X for i in range(y.shape[0]): y_centered[i] = y_centered[i] - (mean_y) lr_centered = LinearRegression(fit_intercept=False) lr_centered.fit(X_centered, y_centered) print(lr_centered.coef_) col1 = np.ones(X.shape[0])[np.newaxis].T X_1 = np.concatenate((col1, X), 1) y_1 = y.copy() lr_1 = LinearRegression(fit_intercept=False) lr_1.fit(X_1, y_1) print(lr_1.coef_[1:]) # COMPARE THEM print(lr_intercept.coef_ == lr_centered.coef_) print([math.isclose(lr_centered.coef_[x], lr_1.coef_[x+1]) for x in range(len(lr_centered.coef_))])

''' 2.b) ''' y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1) assert(10<len(X.columns)) y = boston["MEDV"].head(10) X = boston.drop(["MEDV"], axis = 1).head(10) lreg = LinearRegression() lreg.fit(X,y) print(lreg.score(X,y)) y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1)

''' 2.c) ''' y = boston["MEDV"] X = boston.drop(["MEDV"], axis = 1) # Split data 50-50 train and test split = X.shape[0]//2 X_train = X.loc[:split,:] y_train = y.loc[:split] X_test = X.loc[split:,:] y_test = y.loc[split:] lreg = LinearRegression() lreg.fit(X_train, y_train) mse_0 = mse(lreg.predict(X_test), y_test) print(f"Linear regression MSE is {mse_0}") alphas = [0.1,1,10,100,1000, 10000, 10000000] found = False for a in alphas: ridge = Ridge(alpha=a) ridge.fit(X_train, y_train) mse_1 = mse(ridge.predict(X_test), y_test) if mse_1<mse_0: print(f"Lambda is {a}, mse is {mse_1} for ridge") found = True print("Found better ridge?", found)