import numpy as np
import pandas as pd
df = pd.read_csv("/work/Car details v3.csv")
df.shape
df.head()
df.isnull().any()
df = df.dropna().reset_index(drop=True)
df.isnull().any()
def remove_unit(df,colum_name) :
t = []
for i in df[colum_name]:
number = str(i).split(' ')[0]
t.append(float(number))
return t
df['engine'] = remove_unit(df,'engine')
df['mileage'] = remove_unit(df,'mileage')
df['max_power'] = remove_unit(df,'max_power')
df.head()
df.drop(['name' ,'torque'], axis=1,inplace=True)
df.head()
df['fuel'].value_counts()
df = df[df.fuel != 'CNG']
df = df[df.fuel != 'LPG']
ColumsToConvert = [column for column in df.columns if df[column].dtype == 'O']
ColumsToConvert
def transform_col(df,Col):
for i in Col:
c = pd.get_dummies(df[i],prefix= i, drop_first =True)
df = pd.concat ([df,c], axis = 1)
df.drop(i , axis =1, inplace = True)
return df
df = transform_col(df,ColumsToConvert)
df.columns
df.head()
x = df.drop('selling_price' , axis =1)
y = df.iloc[:,1]
y = y.values
x = x.values
# y = y.to_numpy()
# x = x.to_numpy()
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(x)
from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,y, test_size= 0.25, random_state=42)
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import validation_curve
from sklearn.metrics import r2_score
ModelList = []
from sklearn.linear_model import LinearRegression
modelLR = LinearRegression()
modelLR.fit(Xtrain,Ytrain)
modelLR.score(Xtest,Ytest)
np.mean(cross_val_score(LinearRegression(), Xtrain, Ytrain, cv=10))
from sklearn.tree import DecisionTreeRegressor
modelDTR = DecisionTreeRegressor()
modelDTR.fit( Xtrain, Ytrain)
modelDTR.score(Xtest,Ytest)
np.mean(cross_val_score(DecisionTreeRegressor(), Xtrain, Ytrain, cv=10))
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
modelRi = Ridge()
parametersRidge = {'alpha': range(1,200)}
RidgeGrid= GridSearchCV(modelRi, parametersRidge,cv=10)
RidgeGrid.fit(Xtrain,Ytrain)
RidgeGrid.score(Xtest,Ytest)
RidgeGrid.best_params_
np.mean(cross_val_score(Ridge(alpha= 191), Xtrain, Ytrain, cv=10))
from sklearn.neighbors import KNeighborsRegressor
parametersKNN = [{'weights': ['uniform', 'distance'], 'n_neighbors': range(1, 10, 1),
'algorithm': ['ball_tree', 'kd_tree', 'brute'], 'p': range(1, 3, 1)}]
modelKNN =KNeighborsRegressor()
KNNGrid= GridSearchCV(modelKNN, parametersKNN,cv=10)
modelKNN.fit(Xtrain,Ytrain)
np.mean(cross_val_score(KNeighborsRegressor(), Xtrain, Ytrain, cv=12))
KNNGrid.fit(Xtrain,Ytrain)
KNNGrid.best_params_
np.mean(cross_val_score(KNeighborsRegressor(n_neighbors=3,weights='distance',algorithm='kd_tree', p=1), Xtrain, Ytrain, cv=12))
from sklearn.linear_model import SGDRegressor
modelSGD = SGDRegressor()
modelSGD.fit(Xtrain,Ytrain)
modelSGD.score(Xtest,Ytest)
np.mean(cross_val_score(SGDRegressor(), Xtrain, Ytrain, cv=10))
from sklearn.ensemble import RandomForestRegressor
modelRFR = RandomForestRegressor()
modelRFR.fit(Xtrain,Ytrain)
modelRFR.score(Xtest,Ytest)
np.mean(cross_val_score(RandomForestRegressor(), Xtrain, Ytrain, cv=10))
from sklearn.ensemble import GradientBoostingRegressor
modelGBR = GradientBoostingRegressor()
modelGBR.fit(Xtrain, Ytrain)
modelGBR.score(Xtest,Ytest)
np.mean(cross_val_score(GradientBoostingRegressor(), Xtrain, Ytrain, cv=10))
!pip install xgboost==1.3.3
Collecting xgboost==1.3.3
Downloading xgboost-1.3.3-py3-none-manylinux2010_x86_64.whl (157.5 MB)
|████████████████████████████████| 157.5 MB 130 kB/s
Requirement already satisfied: scipy in /shared-libs/python3.7/py/lib/python3.7/site-packages (from xgboost==1.3.3) (1.6.1)
Requirement already satisfied: numpy in /shared-libs/python3.7/py/lib/python3.7/site-packages (from xgboost==1.3.3) (1.19.5)
Installing collected packages: xgboost
Successfully installed xgboost-1.3.3
WARNING: You are using pip version 20.1.1; however, version 21.0.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
from xgboost import XGBRegressor
modelXGB = XGBRegressor()
modelXGB.fit(Xtrain, Ytrain)
modelXGB.score(Xtest,Ytest)
np.mean(cross_val_score(XGBRegressor(), Xtrain, Ytrain, cv=10))
from sklearn.ensemble import ExtraTreesRegressor
modelETR = ExtraTreesRegressor()
modelETR.fit(Xtrain, Ytrain)
modelETR.score(Xtest,Ytest)
np.mean(cross_val_score(ExtraTreesRegressor(), Xtrain, Ytrain, cv=10))
ETRGrid = GridSearchCV( cv=5,
estimator=modelETR,
param_grid={
'n_estimators': range(50,200, 10),
'max_features': ['sqrt', 'log2', 'auto'],
'min_samples_leaf': range(1,3,1),
'min_samples_split': range(2,10,1),
}
)
ETRGrid.fit(Xtrain,Ytrain)
ETRGrid.best_params_
modelETR1 = ExtraTreesRegressor(min_samples_leaf = 1,
min_samples_split = 3,n_estimators = 100)
modelETR1.fit(Xtrain, Ytrain)
modelETR1.score(Xtest,Ytest)