I will be using a dataset i found on kaggle to predict no. of receptions a building has based on certain features
Dataset can be found here xxxx
x
# importing the basics for this project
import pandas as pd
# getting the file and its data and turning it into a panda's dataframe.
file_path = "./London.csv"
london_dataset = pd.read_csv(file_path)
london_dataset.describe()
# This is what the model will try to predict.
y = london_dataset["No. of Receptions"]
# The model will predict no. of receptions based on the below features.
features = ["Price","Area in sq ft","No. of Bedrooms"]
X = london_dataset[features]
# from sklearn.tree import DecisionTreeRegressor
# import numpy as np
# model = DecisionTreeRegressor(random_state=1)
# model.fit(X,y)
# # Making a test house to see if prediction is correct.
# test_prediction = np.array([[650000,814,2]])
# model.predict(test_prediction)
from sklearn.model_selection import train_test_split
train_X,val_X,train_y,val_y = train_test_split(X, y, random_state=1)
from sklearn.tree import DecisionTreeRegressor
london_model = DecisionTreeRegressor(random_state=1)
london_model.fit(train_X, train_y)
val_prediction = london_model.predict(val_X)
print(val_prediction[0:4])
print(val_y[0:4])
from sklearn.metrics import mean_absolute_error
model_mae = mean_absolute_error(val_prediction, val_y)
print(model_mae)