from zipfile import ZipFile
file_path = 'source/diamonds.csv.zip'
with ZipFile(file_path, 'r') as zip_ref:
zip_ref.extractall('./source/')
import pandas as pd
df = pd.read_csv('source/diamonds.csv')
df.head()
x = df[['carat', 'depth', 'table', 'x', 'y', 'z']]
y = df.price
from sklearn.model_selection import train_test_split
train_x, test_x, train_y, test_y = train_test_split(x, y,random_state = 2,test_size=0.2)
from sklearn import linear_model
model = linear_model.LinearRegression()
model.fit(train_x,train_y)
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
y_pred = model.predict(test_x)
print("Accuracy: "+ str(model.score(test_x,test_y)*100) + "%")
print("Mean absolute error: {}".format(mean_absolute_error(test_y,y_pred)))
print("Mean squared error: {}".format(mean_squared_error(test_y,y_pred)))
R2 = r2_score(test_y,y_pred)
print('R Squared: {}'.format(R2))
n=test_x.shape[0]
p=test_x.shape[1] - 1
adj_rsquared = 1 - (1 - R2) * ((n - 1)/(n-p-1))
print('Adjusted R Squared: {}'.format(adj_rsquared))