!pip install statsmodels
import statsmodels.api as sm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import seaborn as sns
import sqlite3
mpl.rcParams['figure.figsize'] = (15,5)
sns.set()
conn = sqlite3.connect('/work/Property_DB.db')
first_query= 'select * from final_property_table'
df = pd.read_sql(first_query, conn)
df.head(5)
PROPERTY_COUNTint64
SURBURBobject
0
1
Yarraville
1
1
Templestowe Lower
2
1
Templestowe Lower
3
1
Thornbury
4
1
Thornbury
OLS Regression Results
==============================================================================
Dep. Variable: HOUSE_PRICE R-squared: 0.437
Model: OLS Adj. R-squared: 0.436
Method: Least Squares F-statistic: 510.7
Date: Sat, 05 Feb 2022 Prob (F-statistic): 0.00
Time: 04:44:50 Log-Likelihood: -1.4333e+05
No. Observations: 9895 AIC: 2.867e+05
Df Residuals: 9879 BIC: 2.868e+05
Df Model: 15
Covariance Type: nonrobust
=====================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------
const -6.826e+07 2.2e+07 -3.106 0.002 -1.11e+08 -2.52e+07
ROOMS 3.432e+05 7583.241 45.253 0.000 3.28e+05 3.58e+05
DAY -202.5336 1556.205 -0.130 0.896 -3253.012 2847.945
MONTH 1.771e+04 3241.786 5.464 0.000 1.14e+04 2.41e+04
YEAR 3.22e+04 1.09e+04 2.957 0.003 1.09e+04 5.36e+04
QUARTER_DATE -4.105e+04 9347.218 -4.392 0.000 -5.94e+04 -2.27e+04
DISTANCE_FROM_CBD -4.423e+04 905.383 -48.853 0.000 -4.6e+04 -4.25e+04
POSTCODE 1193.2226 52.010 22.942 0.000 1091.273 1295.172
LATTITUDE -1.25e+06 6.68e+04 -18.706 0.000 -1.38e+06 -1.12e+06
LONGTITUDE -3.263e+05 1.74e+04 -18.717 0.000 -3.6e+05 -2.92e+05
BEDROOMS -4.465e+04 8778.056 -5.086 0.000 -6.19e+04 -2.74e+04
BATHROOMS 1.396e+05 9666.347 14.445 0.000 1.21e+05 1.59e+05
CAR_PORTS 4.781e+04 5957.495 8.026 0.000 3.61e+04 5.95e+04
LAND_SIZE 3.4562 1.021 3.385 0.001 1.455 5.458
BUILDING_AREA 392.9760 48.207 8.152 0.000 298.480 487.472
YEAR_BUILT -56.7593 6.173 -9.195 0.000 -68.860 -44.659
==============================================================================
Omnibus: 4505.810 Durbin-Watson: 1.817
Prob(Omnibus): 0.000 Jarque-Bera (JB): 49202.762
Skew: 1.899 Prob(JB): 0.00
Kurtosis: 13.243 Cond. No. 2.19e+07
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.19e+07. This might indicate that there are
strong multicollinearity or other numerical problems.
OLS Regression Results
==============================================================================
Dep. Variable: HOUSE_PRICE R-squared: 0.507
Model: OLS Adj. R-squared: 0.504
Method: Least Squares F-statistic: 157.9
Date: Sat, 05 Feb 2022 Prob (F-statistic): 0.00
Time: 04:44:56 Log-Likelihood: -1.4267e+05
No. Observations: 9895 AIC: 2.855e+05
Df Residuals: 9830 BIC: 2.859e+05
Df Model: 64
Covariance Type: nonrobust
===========================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------------
const -1.34e+07 9.83e+06 -1.362 0.173 -3.27e+07 5.88e+06
DAY 1020.0011 1477.012 0.691 0.490 -1875.246 3915.249
MONTH 1.152e+04 3087.719 3.731 0.000 5467.458 1.76e+04
YEAR 1.273e+04 1.04e+04 1.221 0.222 -7710.279 3.32e+04
POSTCODE 1154.3155 52.365 22.044 0.000 1051.670 1256.961
LATTITUDE -1.308e+06 6.41e+04 -20.404 0.000 -1.43e+06 -1.18e+06
LONGTITUDE -3.411e+05 1.67e+04 -20.395 0.000 -3.74e+05 -3.08e+05
YEAR_BUILT_1800-1899 -3.113e+06 2.46e+06 -1.267 0.205 -7.93e+06 1.7e+06
YEAR_BUILT_1900-1999 -3.394e+06 2.46e+06 -1.381 0.167 -8.21e+06 1.42e+06
YEAR_BUILT_2000-2017 -3.537e+06 2.46e+06 -1.439 0.150 -8.35e+06 1.28e+06
YEAR_BUILT_unknown -3.353e+06 2.46e+06 -1.363 0.173 -8.17e+06 1.47e+06
DISTANCE_0-10 -1.9e+06 1.96e+06 -0.968 0.333 -5.75e+06 1.95e+06
DISTANCE_11-20 -2.265e+06 1.96e+06 -1.153 0.249 -6.12e+06 1.59e+06
DISTANCE_21-30 -2.662e+06 1.97e+06 -1.353 0.176 -6.52e+06 1.2e+06
DISTANCE_31-40 -3.192e+06 1.97e+06 -1.622 0.105 -7.05e+06 6.66e+05
DISTANCE_41-48 -3.376e+06 1.97e+06 -1.714 0.087 -7.24e+06 4.85e+05
TYPE_HOUSE -4.286e+06 3.28e+06 -1.308 0.191 -1.07e+07 2.14e+06
TYPE_TOWNHOUSE -4.466e+06 3.28e+06 -1.363 0.173 -1.09e+07 1.96e+06
TYPE_UNIT -4.644e+06 3.28e+06 -1.417 0.157 -1.11e+07 1.78e+06
ROOMS-1 -2.122e+06 1.05e+06 -2.018 0.044 -4.18e+06 -6.03e+04
ROOMS-2 -1.837e+06 1.05e+06 -1.749 0.080 -3.9e+06 2.22e+05
ROOMS-3 -1.644e+06 1.05e+06 -1.566 0.117 -3.7e+06 4.14e+05
ROOMS-4 -1.31e+06 1.05e+06 -1.249 0.212 -3.37e+06 7.45e+05
ROOMS-5 -1.105e+06 1.05e+06 -1.053 0.292 -3.16e+06 9.51e+05
ROOMS-6 -1.041e+06 1.05e+06 -0.989 0.322 -3.1e+06 1.02e+06
ROOMS-7 -4.328e+05 4.93e+05 -0.878 0.380 -1.4e+06 5.34e+05
ROOMS-8 -7.384e+05 1.06e+06 -0.697 0.486 -2.81e+06 1.34e+06
ROOMS-9 -1.295e+06 1.08e+06 -1.200 0.230 -3.41e+06 8.2e+05
ROOMS-12 -1.872e+06 1.05e+06 -1.781 0.075 -3.93e+06 1.88e+05
QUARTER_1 -3.289e+06 2.46e+06 -1.338 0.181 -8.11e+06 1.53e+06
QUARTER_2 -3.342e+06 2.46e+06 -1.361 0.174 -8.16e+06 1.47e+06
QUARTER_3 -3.36e+06 2.46e+06 -1.366 0.172 -8.18e+06 1.46e+06
QUARTER_4 -3.405e+06 2.46e+06 -1.385 0.166 -8.22e+06 1.42e+06
BEDROOMS_0 2.59e+05 1.61e+05 1.613 0.107 -5.58e+04 5.74e+05
BEDROOMS_1 3.592e+05 1.18e+05 3.050 0.002 1.28e+05 5.9e+05
BEDROOMS_2 3.563e+05 1.1e+05 3.236 0.001 1.4e+05 5.72e+05
BEDROOMS_3 3.416e+05 1.08e+05 3.153 0.002 1.29e+05 5.54e+05
BEDROOMS_4 1.73e+05 1.08e+05 1.602 0.109 -3.87e+04 3.85e+05
BEDROOMS_5 8.887e+04 1.12e+05 0.792 0.429 -1.31e+05 3.09e+05
BEDROOMS_6 -9.818e+04 1.53e+05 -0.643 0.521 -3.98e+05 2.01e+05
BEDROOMS_7 -4.328e+05 4.93e+05 -0.878 0.380 -1.4e+06 5.34e+05
BEDROOMS_8 -3.083e+05 3.09e+05 -0.999 0.318 -9.13e+05 2.96e+05
BEDROOMS_9 1.147e+04 2.38e+05 0.048 0.962 -4.55e+05 4.78e+05
BEDROOMS_10 3.549e+05 5.31e+05 0.668 0.504 -6.86e+05 1.4e+06
BEDROOMS_20 5.676e+05 4.35e+05 1.304 0.192 -2.86e+05 1.42e+06
BATHROOMS_0 -4.272e+05 1.66e+05 -2.566 0.010 -7.53e+05 -1.01e+05
BATHROOMS_1 -3.719e+05 1.15e+05 -3.246 0.001 -5.96e+05 -1.47e+05
BATHROOMS_2 -2.499e+05 1.15e+05 -2.177 0.029 -4.75e+05 -2.49e+04
BATHROOMS_3 -1.637e+04 1.16e+05 -0.141 0.888 -2.43e+05 2.11e+05
BATHROOMS_4 6.657e+05 1.27e+05 5.252 0.000 4.17e+05 9.14e+05
BATHROOMS_5 3.773e+05 1.45e+05 2.605 0.009 9.34e+04 6.61e+05
BATHROOMS_6 -3.233e+05 4.12e+05 -0.784 0.433 -1.13e+06 4.85e+05
BATHROOMS_7 1.147e+04 2.38e+05 0.048 0.962 -4.55e+05 4.78e+05
BATHROOMS_8 1.351e+05 3.96e+05 0.341 0.733 -6.41e+05 9.12e+05
CAR_PORTS_0 1.082e+04 4.83e+04 0.224 0.823 -8.38e+04 1.05e+05
CAR_PORTS_1 -2.359e+04 4.55e+04 -0.519 0.604 -1.13e+05 6.56e+04
CAR_PORTS_2 4.09e+04 4.56e+04 0.897 0.370 -4.85e+04 1.3e+05
CAR_PORTS_3 1.119e+05 4.93e+04 2.269 0.023 1.52e+04 2.09e+05
CAR_PORTS_4 9.854e+04 5.2e+04 1.894 0.058 -3419.873 2e+05
CAR_PORTS_5 2034.1445 7.79e+04 0.026 0.979 -1.51e+05 1.55e+05
CAR_PORTS_6 1.09e+05 8.43e+04 1.293 0.196 -5.63e+04 2.74e+05
CAR_PORTS_7 4.373e+05 1.75e+05 2.504 0.012 9.49e+04 7.8e+05
CAR_PORTS_8 2.292e+04 2.29e+05 0.100 0.920 -4.26e+05 4.72e+05
CAR_PORTS_9 8.74e+05 4.46e+05 1.958 0.050 -881.215 1.75e+06
CAR_PORTS_10 -2.94e+05 4.46e+05 -0.659 0.510 -1.17e+06 5.81e+05
BUILDING_AREA_0-100 1.044e+05 3.15e+05 0.332 0.740 -5.12e+05 7.21e+05
BUILDING_AREA_101-200 9.15e+04 3.15e+05 0.290 0.772 -5.26e+05 7.09e+05
BUILDING_AREA_201-300 3.366e+05 3.15e+05 1.067 0.286 -2.82e+05 9.55e+05
BUILDING_AREA_301-400 5.275e+05 3.17e+05 1.663 0.096 -9.44e+04 1.15e+06
BUILDING_AREA_401-1000 6.325e+05 3.2e+05 1.974 0.048 4514.569 1.26e+06
BUILDING_AREA_1000_plus 0 0 nan nan 0 0
LAND_SIZE_0-1000 -2.195e+05 1.21e+05 -1.815 0.070 -4.57e+05 1.75e+04
LAND_SIZE_1001-5000 -4.983e+04 1.23e+05 -0.405 0.685 -2.91e+05 1.91e+05
LAND_SIZE_5001-9999 -2.161e+05 1.48e+05 -1.457 0.145 -5.07e+05 7.46e+04
LAND_SIZE_10000_plus 0 0 nan nan 0 0
==============================================================================
Omnibus: 4928.775 Durbin-Watson: 1.811
Prob(Omnibus): 0.000 Jarque-Bera (JB): 66621.081
Skew: 2.058 Prob(JB): 0.00
Kurtosis: 15.027 Cond. No. 1.00e+16
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The smallest eigenvalue is 1.36e-21. This might indicate that there are
strong multicollinearity problems or that the design matrix is singular.