UK-used-cars-price-Analysis

import sys import math import pandas as pd import seaborn as sns import numpy as np import matplotlib.pyplot as plt # import plotly import plotly.express as px !pip install ipywidgets import ipywidgets as widgets from ipywidgets import interact sns.set_style('whitegrid') %matplotlib inline %config InlineBackend.figure_format = 'retina'

Requirement already satisfied: ipywidgets in /root/venv/lib/python3.7/site-packages (7.6.5)
Requirement already satisfied: ipython>=4.0.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (7.28.0)
Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /root/venv/lib/python3.7/site-packages (from ipywidgets) (1.0.2)
Requirement already satisfied: nbformat>=4.2.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (5.1.3)
Requirement already satisfied: traitlets>=4.3.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (4.3.3)
Requirement already satisfied: ipython-genutils~=0.2.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (0.2.0)
Requirement already satisfied: widgetsnbextension~=3.5.0 in /root/venv/lib/python3.7/site-packages (from ipywidgets) (3.5.1)
Requirement already satisfied: ipykernel>=4.5.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipywidgets) (5.5.5)
Requirement already satisfied: jupyter-client in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1.12)
Requirement already satisfied: tornado>=4.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1)
Requirement already satisfied: decorator in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (5.1.0)
Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (3.0.20)
Requirement already satisfied: jedi>=0.16 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (0.17.2)
Requirement already satisfied: pygments in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (2.10.0)
Requirement already satisfied: pexpect>4.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (4.8.0)
Requirement already satisfied: setuptools>=18.5 in /root/venv/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (58.1.0)
Requirement already satisfied: pickleshare in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (0.7.5)
Requirement already satisfied: backcall in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (0.2.0)
Requirement already satisfied: matplotlib-inline in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from ipython>=4.0.0->ipywidgets) (0.1.3)
Requirement already satisfied: parso<0.8.0,>=0.7.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets) (0.7.1)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbformat>=4.2.0->ipywidgets) (3.2.0)
Requirement already satisfied: jupyter-core in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbformat>=4.2.0->ipywidgets) (4.7.1)
Requirement already satisfied: pyrsistent>=0.14.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (0.18.0)
Requirement already satisfied: importlib-metadata in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (4.8.1)
Requirement already satisfied: attrs>=17.4.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (21.2.0)
Requirement already satisfied: six>=1.11.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (1.16.0)
Requirement already satisfied: ptyprocess>=0.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets) (0.7.0)
Requirement already satisfied: wcwidth in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets) (0.2.5)
Requirement already satisfied: notebook>=4.4.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from widgetsnbextension~=3.5.0->ipywidgets) (6.3.0)
Requirement already satisfied: prometheus-client in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.11.0)
Requirement already satisfied: jinja2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (3.0.2)
Requirement already satisfied: terminado>=0.8.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.12.1)
Requirement already satisfied: argon2-cffi in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (21.1.0)
Requirement already satisfied: Send2Trash>=1.5.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.8.0)
Requirement already satisfied: pyzmq>=17 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (22.3.0)
Requirement already satisfied: nbconvert==6.0.7 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (6.0.7)
Requirement already satisfied: python-dateutil>=2.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.2)
Requirement already satisfied: entrypoints>=0.2.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.3)
Requirement already satisfied: defusedxml in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.7.1)
Requirement already satisfied: mistune<2,>=0.8.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.4)
Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.4)
Requirement already satisfied: bleach in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (4.1.0)
Requirement already satisfied: testpath in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.0)
Requirement already satisfied: pandocfilters>=1.4.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.0)
Requirement already satisfied: jupyterlab-pygments in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.1.2)
Requirement already satisfied: MarkupSafe>=2.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.0.1)
Requirement already satisfied: nest-asyncio in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.1)
Requirement already satisfied: cffi>=1.0.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.14.6)
Requirement already satisfied: pycparser in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.20)
Requirement already satisfied: webencodings in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from bleach->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.1)
Requirement already satisfied: packaging in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from bleach->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (21.0)
Requirement already satisfied: zipp>=0.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from importlib-metadata->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (3.6.0)
Requirement already satisfied: typing-extensions>=3.6.4 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from importlib-metadata->jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (3.10.0.2)
Requirement already satisfied: pyparsing>=2.0.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from packaging->bleach->nbconvert==6.0.7->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.4.7)
WARNING: You are using pip version 21.2.4; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.

print(f'Python version: {sys.version}') print(f'pandas version: {pd.__version__}') print(f'numpy version: {np.__version__}') print(f'seaborn version: {sns.__version__}') pd.Timestamp.now() # pd.Timestamp.now().strftime('%Y-%m-%d')

Python  version: 3.7.12 (default, Oct 12 2021, 03:36:26) 
[GCC 8.3.0]
pandas  version: 1.2.5
numpy   version: 1.19.5
seaborn version: 0.11.2

import warnings warnings.filterwarnings('ignore')

from datetime import datetime import socket # pip install socket print(f'last run: {datetime.now()}') try: print(f'ip addtress: {socket.gethostbyname(socket.gethostname())} ({socket.gethostname()})') except: pass

last run: 2021-10-22 16:49:14.833760
ip addtress: 172.3.49.191 (p-d76f3d72-4f5e-4ae5-9e32-1e304bde2c7d)

def df_unique_value(df): for c in df.columns: col_type = df[c].dtype if col_type == 'object' or col_type.name == 'category': print(f'{c:10}\n{df[c].unique()}') print('-' * 65) def convert_cols_to_category(df, cols:list): """ convert `cols` to `category` """ for c in df[cols]: df[c] = df[c].astype('category') return df def convert_obj_columns_to_category(df): for c in df.columns: col_type = df[c].dtype if col_type == 'object' or col_type.name == 'category': df[c] = df[c].astype('category') return df def print_category_columns(df): for c in df.columns: col_type = df[c].dtype if col_type.name == 'category': # print(f'{c}: {df[c].cat.categories}') # print(pd.Series(df[c].cat.categories)) print(f'{c:15}: {list(enumerate(df[c].cat.categories))}') print('-' * 60) def plot_mn(df, cols, n_rows:int=1, kind:str='boxplot', color='salmon'): """ plot boxplot, violin, hist in m (rows) by n (columns) >>> plot_mn(df, ['Calories', 'Fat'], 2, 'hist') """ n=len(cols) n_cols=math.ceil(n / n_rows) fig, ax = plt.subplots(n_rows, n_cols, figsize=(n_cols*3, n_rows*3.5)) ax=ax.ravel() fig.tight_layout() for i, c in enumerate(cols): col_type = df[c].dtype if col_type.name == 'category': sns.countplot(data=df, x=c, ax=ax[i]) else: if kind.lower()=='boxplot': sns.boxplot(data=df[[c]], ax=ax[i], color=color) if kind.lower()=='boxen': sns.boxenplot(data=df[[c]], ax=ax[i], color=color) elif kind.lower()=='violin': sns.violinplot(data=df[[c]], ax=ax[i], color=color) elif kind.lower()=='hist': sns.distplot(df[c], hist=True, kde=False, ax=ax[i], color=color)

mfg = ['bmw', 'vw', 'ford', 'toyota', 'hyundi'] dfs = [] for f in mfg: url = f'https://github.com/prasertcbs/basic-dataset/raw/master/q2/{f}.csv' print(url) dt = pd.read_csv(url, skipinitialspace=True) dt['mfg'] = f dfs.append(dt) df = pd.concat(dfs) df

https://github.com/prasertcbs/basic-dataset/raw/master/q2/bmw.csv
https://github.com/prasertcbs/basic-dataset/raw/master/q2/vw.csv
https://github.com/prasertcbs/basic-dataset/raw/master/q2/ford.csv
https://github.com/prasertcbs/basic-dataset/raw/master/q2/toyota.csv
https://github.com/prasertcbs/basic-dataset/raw/master/q2/hyundi.csv

df.columns

df['mileage_km'] = df['mileage']*1.60934 df['kml'] = df['mpg']*0.4251437075 df

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7500 entries, 0 to 1499
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   model         7380 non-null   object 
 1   year          7380 non-null   float64
 2   price         7500 non-null   int64  
 3   transmission  7440 non-null   object 
 4   mileage       7320 non-null   float64
 5   fuelType      7320 non-null   object 
 6   mpg           7440 non-null   float64
 7   engineSize    7320 non-null   float64
 8   mfg           7500 non-null   object 
 9   mileage_km    7320 non-null   float64
 10  kml           7440 non-null   float64
dtypes: float64(6), int64(1), object(4)
memory usage: 703.1+ KB

df = df.dropna().reset_index(drop = True).copy() df

df.drop_duplicates(inplace=True) df.drop(columns=['mpg','mileage'],inplace=True)

df.isna().sum()

df = convert_obj_columns_to_category(df) df[['mileage_km','kml']] = df[['mileage_km','kml']].apply(lambda x: round(number=x, ndigits=2)) df

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6606 entries, 0 to 6632
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   model         6606 non-null   category
 1   year          6606 non-null   float64 
 2   price         6606 non-null   int64   
 3   transmission  6606 non-null   category
 4   fuelType      6606 non-null   category
 5   engineSize    6606 non-null   float64 
 6   mfg           6606 non-null   category
 7   mileage_km    6606 non-null   float64 
 8   kml           6606 non-null   float64 
dtypes: category(4), float64(4), int64(1)
memory usage: 338.8 KB

df.nlargest(10, 'kml').style.background_gradient(cmap='Blues',subset=["kml"])

plot_mn(df, df.columns, 3, 'boxen')

df.sort_values('kml', ascending=False).head(10).style.background_gradient(cmap='Blues',subset=["kml"])\ .background_gradient(cmap='Reds',subset=["engineSize"])

df.drop(df[df['fuelType']=='Other'].index,inplace=True) df.drop(df[df['fuelType']=='Hybrid'].index,inplace=True) df.drop(df[df['fuelType']=='Electric'].index,inplace=True)

fuelType_count=df['fuelType'].value_counts() pd.DataFrame(fuelType_count).style.background_gradient(cmap='Greens')

# ax = sns.countplot(df.fuelType, palette="ch:.1", order=['Petrol','Diesel']) # import plotly.io as pio px.histogram(df.fuelType,x='fuelType',color='fuelType',title = 'Count_Plot_fuelType') # pio.write_json(fig, 'countplot.plotly') # fig_styled = pio.read_json('countplot.plotly') # fig_styled

df.reset_index(drop='True')

df.columns

cols=['model', 'year', 'price', 'transmission', 'fuelType', 'engineSize', 'mfg', 'mileage_km', 'kml']

dcorr = df[cols].corr() # dcorr mask = np.zeros_like(dcorr) # mask.shape mask[np.triu_indices_from(mask)] = True fig, ax = plt.subplots(figsize=(10,8)) sns.heatmap(dcorr, cmap=sns.diverging_palette(10, 145, n=100), vmin=-1, vmax=1, center=0, linewidths=1, annot=True, mask=mask, ax=ax).set_title("Correlation\nHeatmap", fontsize=22,fontweight="bold");

feature_cols=['year','transmission', 'fuelType', 'engineSize', 'mfg', 'mileage_km', 'kml'] target_col='price' X=df[feature_cols] y=df[target_col]

from sklearn.model_selection import train_test_split,cross_val_score X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

!pip install flaml from flaml import AutoML automl = AutoML()

Requirement already satisfied: flaml in /root/venv/lib/python3.7/site-packages (0.6.9)
Requirement already satisfied: scipy>=1.4.1 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from flaml) (1.7.1)
Requirement already satisfied: lightgbm>=2.3.1 in /root/venv/lib/python3.7/site-packages (from flaml) (3.3.0)
Requirement already satisfied: scikit-learn>=0.24 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from flaml) (1.0)
Requirement already satisfied: NumPy>=1.16.2 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from flaml) (1.19.5)
Requirement already satisfied: xgboost<=1.3.3,>=0.90 in /root/venv/lib/python3.7/site-packages (from flaml) (1.3.3)
Requirement already satisfied: pandas>=1.1.4 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from flaml) (1.2.5)
Requirement already satisfied: wheel in /root/venv/lib/python3.7/site-packages (from lightgbm>=2.3.1->flaml) (0.37.0)
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pandas>=1.1.4->flaml) (2021.3)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pandas>=1.1.4->flaml) (2.8.2)
Requirement already satisfied: six>=1.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas>=1.1.4->flaml) (1.16.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from scikit-learn>=0.24->flaml) (3.0.0)
Requirement already satisfied: joblib>=0.11 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from scikit-learn>=0.24->flaml) (1.1.0)
WARNING: You are using pip version 21.2.4; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.

settings = { "time_budget": 100, # total running time in seconds "metric": 'r2', # primary metrics for regression can be chosen from: ['mae','mse','r2'] "estimator_list": ['lgbm'], # list of ML learners; we tune lightgbm in this example "task": 'regression', # task type "log_file_name": 'UK_used_car_price.log', # flaml log file }

'''The main flaml automl API''' automl.fit(X_train=X_train, y_train=y_train, **settings)

[flaml.automl: 10-22 16:49:35] {1463} INFO - Data split method: uniform
[flaml.automl: 10-22 16:49:35] {1467} INFO - Evaluation method: cv
[flaml.automl: 10-22 16:49:35] {1515} INFO - Minimizing error metric: 1-r2
[flaml.automl: 10-22 16:49:35] {1552} INFO - List of ML learners in AutoML Run: ['lgbm']
[flaml.automl: 10-22 16:49:35] {1793} INFO - iteration 0, current learner lgbm
[flaml.automl: 10-22 16:49:36] {1911} INFO - Estimated sufficient time budget=2025s. Estimated necessary time budget=2s.
[flaml.automl: 10-22 16:49:36] {1987} INFO -  at 0.3s,	estimator lgbm's best error=0.6505,	best estimator lgbm's best error=0.6505
[flaml.automl: 10-22 16:49:36] {1793} INFO - iteration 1, current learner lgbm
[flaml.automl: 10-22 16:49:36] {1987} INFO -  at 0.5s,	estimator lgbm's best error=0.6505,	best estimator lgbm's best error=0.6505
[flaml.automl: 10-22 16:49:36] {1793} INFO - iteration 2, current learner lgbm
[flaml.automl: 10-22 16:49:36] {1987} INFO -  at 0.7s,	estimator lgbm's best error=0.3718,	best estimator lgbm's best error=0.3718
[flaml.automl: 10-22 16:49:36] {1793} INFO - iteration 3, current learner lgbm
[flaml.automl: 10-22 16:49:36] {1987} INFO -  at 1.0s,	estimator lgbm's best error=0.1586,	best estimator lgbm's best error=0.1586
[flaml.automl: 10-22 16:49:36] {1793} INFO - iteration 4, current learner lgbm
[flaml.automl: 10-22 16:49:37] {1987} INFO -  at 1.2s,	estimator lgbm's best error=0.1586,	best estimator lgbm's best error=0.1586
[flaml.automl: 10-22 16:49:37] {1793} INFO - iteration 5, current learner lgbm
[flaml.automl: 10-22 16:49:37] {1987} INFO -  at 1.4s,	estimator lgbm's best error=0.1483,	best estimator lgbm's best error=0.1483
[flaml.automl: 10-22 16:49:37] {1793} INFO - iteration 6, current learner lgbm
[flaml.automl: 10-22 16:49:37] {1987} INFO -  at 1.7s,	estimator lgbm's best error=0.1483,	best estimator lgbm's best error=0.1483
[flaml.automl: 10-22 16:49:37] {1793} INFO - iteration 7, current learner lgbm
[flaml.automl: 10-22 16:49:37] {1987} INFO -  at 1.9s,	estimator lgbm's best error=0.1483,	best estimator lgbm's best error=0.1483
[flaml.automl: 10-22 16:49:37] {1793} INFO - iteration 8, current learner lgbm
[flaml.automl: 10-22 16:49:38] {1987} INFO -  at 2.2s,	estimator lgbm's best error=0.1258,	best estimator lgbm's best error=0.1258
[flaml.automl: 10-22 16:49:38] {1793} INFO - iteration 9, current learner lgbm
[flaml.automl: 10-22 16:49:38] {1987} INFO -  at 2.4s,	estimator lgbm's best error=0.1258,	best estimator lgbm's best error=0.1258
[flaml.automl: 10-22 16:49:38] {1793} INFO - iteration 10, current learner lgbm
[flaml.automl: 10-22 16:49:38] {1987} INFO -  at 2.9s,	estimator lgbm's best error=0.0875,	best estimator lgbm's best error=0.0875
[flaml.automl: 10-22 16:49:38] {1793} INFO - iteration 11, current learner lgbm
[flaml.automl: 10-22 16:49:39] {1987} INFO -  at 3.4s,	estimator lgbm's best error=0.0875,	best estimator lgbm's best error=0.0875
[flaml.automl: 10-22 16:49:39] {1793} INFO - iteration 12, current learner lgbm
[flaml.automl: 10-22 16:49:39] {1987} INFO -  at 3.8s,	estimator lgbm's best error=0.0875,	best estimator lgbm's best error=0.0875
[flaml.automl: 10-22 16:49:39] {1793} INFO - iteration 13, current learner lgbm
[flaml.automl: 10-22 16:49:40] {1987} INFO -  at 4.3s,	estimator lgbm's best error=0.0875,	best estimator lgbm's best error=0.0875
[flaml.automl: 10-22 16:49:40] {1793} INFO - iteration 14, current learner lgbm
[flaml.automl: 10-22 16:49:40] {1987} INFO -  at 4.9s,	estimator lgbm's best error=0.0875,	best estimator lgbm's best error=0.0875
[flaml.automl: 10-22 16:49:40] {1793} INFO - iteration 15, current learner lgbm
[flaml.automl: 10-22 16:49:41] {1987} INFO -  at 5.5s,	estimator lgbm's best error=0.0875,	best estimator lgbm's best error=0.0875
[flaml.automl: 10-22 16:49:41] {1793} INFO - iteration 16, current learner lgbm
[flaml.automl: 10-22 16:49:41] {1987} INFO -  at 5.9s,	estimator lgbm's best error=0.0875,	best estimator lgbm's best error=0.0875
[flaml.automl: 10-22 16:49:41] {1793} INFO - iteration 17, current learner lgbm
[flaml.automl: 10-22 16:49:42] {1987} INFO -  at 6.6s,	estimator lgbm's best error=0.0841,	best estimator lgbm's best error=0.0841
[flaml.automl: 10-22 16:49:42] {1793} INFO - iteration 18, current learner lgbm
[flaml.automl: 10-22 16:49:42] {1987} INFO -  at 7.0s,	estimator lgbm's best error=0.0841,	best estimator lgbm's best error=0.0841
[flaml.automl: 10-22 16:49:42] {1793} INFO - iteration 19, current learner lgbm
[flaml.automl: 10-22 16:49:43] {1987} INFO -  at 7.5s,	estimator lgbm's best error=0.0841,	best estimator lgbm's best error=0.0841
[flaml.automl: 10-22 16:49:43] {1793} INFO - iteration 20, current learner lgbm
[flaml.automl: 10-22 16:49:44] {1987} INFO -  at 8.8s,	estimator lgbm's best error=0.0841,	best estimator lgbm's best error=0.0841
[flaml.automl: 10-22 16:49:44] {1793} INFO - iteration 21, current learner lgbm
[flaml.automl: 10-22 16:49:45] {1987} INFO -  at 9.5s,	estimator lgbm's best error=0.0838,	best estimator lgbm's best error=0.0838
[flaml.automl: 10-22 16:49:45] {1793} INFO - iteration 22, current learner lgbm
[flaml.automl: 10-22 16:49:46] {1987} INFO -  at 10.2s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:46] {1793} INFO - iteration 23, current learner lgbm
[flaml.automl: 10-22 16:49:46] {1987} INFO -  at 10.9s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:46] {1793} INFO - iteration 24, current learner lgbm
[flaml.automl: 10-22 16:49:47] {1987} INFO -  at 11.7s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:47] {1793} INFO - iteration 25, current learner lgbm
[flaml.automl: 10-22 16:49:48] {1987} INFO -  at 13.1s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:48] {1793} INFO - iteration 26, current learner lgbm
[flaml.automl: 10-22 16:49:49] {1987} INFO -  at 13.5s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:49] {1793} INFO - iteration 27, current learner lgbm
[flaml.automl: 10-22 16:49:50] {1987} INFO -  at 14.6s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:50] {1793} INFO - iteration 28, current learner lgbm
[flaml.automl: 10-22 16:49:50] {1987} INFO -  at 15.1s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:50] {1793} INFO - iteration 29, current learner lgbm
[flaml.automl: 10-22 16:49:51] {1987} INFO -  at 15.4s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:51] {1793} INFO - iteration 30, current learner lgbm
[flaml.automl: 10-22 16:49:52] {1987} INFO -  at 17.0s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:52] {1793} INFO - iteration 31, current learner lgbm
[flaml.automl: 10-22 16:49:53] {1987} INFO -  at 17.6s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:53] {1793} INFO - iteration 32, current learner lgbm
[flaml.automl: 10-22 16:49:54] {1987} INFO -  at 18.5s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:54] {1793} INFO - iteration 33, current learner lgbm
[flaml.automl: 10-22 16:49:54] {1987} INFO -  at 19.1s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:54] {1793} INFO - iteration 34, current learner lgbm
[flaml.automl: 10-22 16:49:55] {1987} INFO -  at 19.7s,	estimator lgbm's best error=0.0817,	best estimator lgbm's best error=0.0817
[flaml.automl: 10-22 16:49:55] {1793} INFO - iteration 35, current learner lgbm
[flaml.automl: 10-22 16:49:57] {1987} INFO -  at 21.2s,	estimator lgbm's best error=0.0810,	best estimator lgbm's best error=0.0810
[flaml.automl: 10-22 16:49:57] {1793} INFO - iteration 36, current learner lgbm
[flaml.automl: 10-22 16:49:57] {1987} INFO -  at 21.8s,	estimator lgbm's best error=0.0810,	best estimator lgbm's best error=0.0810
[flaml.automl: 10-22 16:49:57] {1793} INFO - iteration 37, current learner lgbm
[flaml.automl: 10-22 16:50:00] {1987} INFO -  at 24.9s,	estimator lgbm's best error=0.0810,	best estimator lgbm's best error=0.0810
[flaml.automl: 10-22 16:50:00] {1793} INFO - iteration 38, current learner lgbm
[flaml.automl: 10-22 16:50:02] {1987} INFO -  at 26.4s,	estimator lgbm's best error=0.0809,	best estimator lgbm's best error=0.0809
[flaml.automl: 10-22 16:50:02] {1793} INFO - iteration 39, current learner lgbm
[flaml.automl: 10-22 16:50:10] {1987} INFO -  at 34.9s,	estimator lgbm's best error=0.0809,	best estimator lgbm's best error=0.0809
[flaml.automl: 10-22 16:50:10] {1793} INFO - iteration 40, current learner lgbm
[flaml.automl: 10-22 16:50:11] {1987} INFO -  at 35.4s,	estimator lgbm's best error=0.0809,	best estimator lgbm's best error=0.0809
[flaml.automl: 10-22 16:50:11] {1793} INFO - iteration 41, current learner lgbm
[flaml.automl: 10-22 16:50:11] {1987} INFO -  at 35.9s,	estimator lgbm's best error=0.0809,	best estimator lgbm's best error=0.0809
[flaml.automl: 10-22 16:50:11] {1793} INFO - iteration 42, current learner lgbm
[flaml.automl: 10-22 16:50:19] {1987} INFO -  at 43.6s,	estimator lgbm's best error=0.0809,	best estimator lgbm's best error=0.0809
[flaml.automl: 10-22 16:50:19] {1793} INFO - iteration 43, current learner lgbm
[flaml.automl: 10-22 16:50:23] {1987} INFO -  at 47.6s,	estimator lgbm's best error=0.0809,	best estimator lgbm's best error=0.0809
[flaml.automl: 10-22 16:50:23] {1793} INFO - iteration 44, current learner lgbm
[flaml.automl: 10-22 16:50:24] {1987} INFO -  at 48.3s,	estimator lgbm's best error=0.0800,	best estimator lgbm's best error=0.0800
[flaml.automl: 10-22 16:50:24] {1793} INFO - iteration 45, current learner lgbm
[flaml.automl: 10-22 16:50:24] {1987} INFO -  at 48.7s,	estimator lgbm's best error=0.0800,	best estimator lgbm's best error=0.0800
[flaml.automl: 10-22 16:50:24] {1793} INFO - iteration 46, current learner lgbm
[flaml.automl: 10-22 16:50:27] {1987} INFO -  at 51.9s,	estimator lgbm's best error=0.0800,	best estimator lgbm's best error=0.0800
[flaml.automl: 10-22 16:50:27] {1793} INFO - iteration 47, current learner lgbm
[flaml.automl: 10-22 16:50:28] {1987} INFO -  at 52.3s,	estimator lgbm's best error=0.0800,	best estimator lgbm's best error=0.0800
[flaml.automl: 10-22 16:50:28] {1793} INFO - iteration 48, current learner lgbm
[flaml.automl: 10-22 16:50:31] {1987} INFO -  at 55.4s,	estimator lgbm's best error=0.0800,	best estimator lgbm's best error=0.0800
[flaml.automl: 10-22 16:50:31] {1793} INFO - iteration 49, current learner lgbm
[flaml.automl: 10-22 16:50:31] {1987} INFO -  at 55.7s,	estimator lgbm's best error=0.0800,	best estimator lgbm's best error=0.0800
[flaml.automl: 10-22 16:50:31] {1793} INFO - iteration 50, current learner lgbm
[flaml.automl: 10-22 16:50:35] {1987} INFO -  at 60.1s,	estimator lgbm's best error=0.0800,	best estimator lgbm's best error=0.0800
[flaml.automl: 10-22 16:50:35] {1793} INFO - iteration 51, current learner lgbm
[flaml.automl: 10-22 16:50:38] {1987} INFO -  at 62.4s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:38] {1793} INFO - iteration 52, current learner lgbm
[flaml.automl: 10-22 16:50:46] {1987} INFO -  at 70.2s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:46] {1793} INFO - iteration 53, current learner lgbm
[flaml.automl: 10-22 16:50:46] {1987} INFO -  at 70.9s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:46] {1793} INFO - iteration 54, current learner lgbm
[flaml.automl: 10-22 16:50:48] {1987} INFO -  at 73.0s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:48] {1793} INFO - iteration 55, current learner lgbm
[flaml.automl: 10-22 16:50:51] {1987} INFO -  at 76.1s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:51] {1793} INFO - iteration 56, current learner lgbm
[flaml.automl: 10-22 16:50:54] {1987} INFO -  at 78.6s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:54] {1793} INFO - iteration 57, current learner lgbm
[flaml.automl: 10-22 16:50:56] {1987} INFO -  at 80.9s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:56] {1793} INFO - iteration 58, current learner lgbm
[flaml.automl: 10-22 16:50:57] {1987} INFO -  at 82.0s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:50:57] {1793} INFO - iteration 59, current learner lgbm
[flaml.automl: 10-22 16:51:01] {1987} INFO -  at 85.9s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:51:01] {1793} INFO - iteration 60, current learner lgbm
[flaml.automl: 10-22 16:51:10] {1987} INFO -  at 94.3s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:51:10] {1793} INFO - iteration 61, current learner lgbm
[flaml.automl: 10-22 16:51:10] {1987} INFO -  at 94.7s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:51:10] {1793} INFO - iteration 62, current learner lgbm
[flaml.automl: 10-22 16:51:13] {1987} INFO -  at 97.5s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:51:13] {1793} INFO - iteration 63, current learner lgbm
[flaml.automl: 10-22 16:51:13] {1987} INFO -  at 98.0s,	estimator lgbm's best error=0.0776,	best estimator lgbm's best error=0.0776
[flaml.automl: 10-22 16:51:13] {2087} INFO - selected model: LGBMRegressor(colsample_bytree=0.9387926402887784,
              learning_rate=0.07462243384581825, max_bin=511,
              min_child_samples=3, n_estimators=545, num_leaves=18,
              reg_alpha=0.10333698843991193, reg_lambda=0.004913466630336639,
              verbose=-1)
[flaml.automl: 10-22 16:51:14] {2151} INFO - retrain lgbm for 0.5s
[flaml.automl: 10-22 16:51:14] {2155} INFO - retrained model: LGBMRegressor(colsample_bytree=0.9387926402887784,
              learning_rate=0.07462243384581825, max_bin=511,
              min_child_samples=3, n_estimators=545, num_leaves=18,
              reg_alpha=0.10333698843991193, reg_lambda=0.004913466630336639,
              verbose=-1)
[flaml.automl: 10-22 16:51:14] {1576} INFO - fit succeeded
[flaml.automl: 10-22 16:51:14] {1578} INFO - Time taken to find the best model: 62.44079804420471

automl.model

''' compute predictions of testing dataset ''' y_pred = automl.predict(X_test) print('Predicted labels', y_pred) print('True labels', y_test)

Predicted labels [ 7666.86109386 15330.85711618  8270.28789868 ...  8673.77484936
 12713.23316695 17874.71304822]
True labels 1989     8311
985     15991
3763     8998
237     18148
4926    11490
        ...  
2690     6390
946     18000
5423     9689
5177    11975
5949    17000
Name: price, Length: 1809, dtype: int64

''' compute different metric values on testing dataset''' from flaml.ml import sklearn_metric_loss_score print('r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test)) print('mse', '=', sklearn_metric_loss_score('mse', y_pred, y_test)) print('mae', '=', sklearn_metric_loss_score('mae', y_pred, y_test))

r2 = 0.8911677265464114
mse = 8815374.800960746
mae = 1460.6715680772309

from flaml.data import get_output_from_log time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \ get_output_from_log(filename=settings['log_file_name'], time_budget=60) for config in config_history: print(config)

{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.09999999999999995, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.09999999999999995, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 12, 'learning_rate': 0.26770501231052046, 'log_max_bin': 7, 'colsample_bytree': 1.0, 'reg_alpha': 0.001348364934537134, 'reg_lambda': 1.4442580148221913}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 4, 'num_leaves': 4, 'min_child_samples': 12, 'learning_rate': 0.26770501231052046, 'log_max_bin': 7, 'colsample_bytree': 1.0, 'reg_alpha': 0.001348364934537134, 'reg_lambda': 1.4442580148221913}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 9, 'num_leaves': 4, 'min_child_samples': 9, 'learning_rate': 0.7260594590615893, 'log_max_bin': 9, 'colsample_bytree': 0.9285002286474459, 'reg_alpha': 0.0036840681931986645, 'reg_lambda': 0.7532480505730402}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 9, 'num_leaves': 4, 'min_child_samples': 9, 'learning_rate': 0.7260594590615893, 'log_max_bin': 9, 'colsample_bytree': 0.9285002286474459, 'reg_alpha': 0.0036840681931986645, 'reg_lambda': 0.7532480505730402}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 10, 'num_leaves': 5, 'min_child_samples': 5, 'learning_rate': 0.7590459488450945, 'log_max_bin': 8, 'colsample_bytree': 0.8304072431299575, 'reg_alpha': 0.001951378031519758, 'reg_lambda': 0.04792552866398477}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 10, 'num_leaves': 5, 'min_child_samples': 5, 'learning_rate': 0.7590459488450945, 'log_max_bin': 8, 'colsample_bytree': 0.8304072431299575, 'reg_alpha': 0.001951378031519758, 'reg_lambda': 0.04792552866398477}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 28, 'num_leaves': 4, 'min_child_samples': 4, 'learning_rate': 0.41929025492645006, 'log_max_bin': 8, 'colsample_bytree': 0.7610534336273627, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.009280655005879927}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 28, 'num_leaves': 4, 'min_child_samples': 4, 'learning_rate': 0.41929025492645006, 'log_max_bin': 8, 'colsample_bytree': 0.7610534336273627, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.009280655005879927}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 75, 'num_leaves': 14, 'min_child_samples': 3, 'learning_rate': 0.17402065726724145, 'log_max_bin': 8, 'colsample_bytree': 0.6649148062238498, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.006761362450996487}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 75, 'num_leaves': 14, 'min_child_samples': 3, 'learning_rate': 0.17402065726724145, 'log_max_bin': 8, 'colsample_bytree': 0.6649148062238498, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.006761362450996487}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 49, 'num_leaves': 43, 'min_child_samples': 2, 'learning_rate': 0.1530612501227463, 'log_max_bin': 10, 'colsample_bytree': 0.7463308378914483, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.012698515198279517}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 49, 'num_leaves': 43, 'min_child_samples': 2, 'learning_rate': 0.1530612501227463, 'log_max_bin': 10, 'colsample_bytree': 0.7463308378914483, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.012698515198279517}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 84, 'num_leaves': 24, 'min_child_samples': 2, 'learning_rate': 0.18908812295671118, 'log_max_bin': 10, 'colsample_bytree': 0.7965188503204358, 'reg_alpha': 0.004577823970660193, 'reg_lambda': 0.02722880765786953}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 84, 'num_leaves': 24, 'min_child_samples': 2, 'learning_rate': 0.18908812295671118, 'log_max_bin': 10, 'colsample_bytree': 0.7965188503204358, 'reg_alpha': 0.004577823970660193, 'reg_lambda': 0.02722880765786953}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 49, 'num_leaves': 44, 'min_child_samples': 5, 'learning_rate': 0.1530612501227464, 'log_max_bin': 9, 'colsample_bytree': 0.7463308378914483, 'reg_alpha': 0.0009765625000000002, 'reg_lambda': 0.012698515198279517}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 49, 'num_leaves': 44, 'min_child_samples': 5, 'learning_rate': 0.1530612501227464, 'log_max_bin': 9, 'colsample_bytree': 0.7463308378914483, 'reg_alpha': 0.0009765625000000002, 'reg_lambda': 0.012698515198279517}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 186, 'num_leaves': 54, 'min_child_samples': 4, 'learning_rate': 0.23158746785935408, 'log_max_bin': 8, 'colsample_bytree': 0.7577916169254649, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.03188419400042777}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 186, 'num_leaves': 54, 'min_child_samples': 4, 'learning_rate': 0.23158746785935408, 'log_max_bin': 8, 'colsample_bytree': 0.7577916169254649, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.03188419400042777}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 342, 'num_leaves': 49, 'min_child_samples': 7, 'learning_rate': 0.1180103887802835, 'log_max_bin': 9, 'colsample_bytree': 0.7407378789573081, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.11929523168059925}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 342, 'num_leaves': 49, 'min_child_samples': 7, 'learning_rate': 0.1180103887802835, 'log_max_bin': 9, 'colsample_bytree': 0.7407378789573081, 'reg_alpha': 0.0009765625, 'reg_lambda': 0.11929523168059925}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 186, 'num_leaves': 54, 'min_child_samples': 4, 'learning_rate': 0.23158746785935408, 'log_max_bin': 8, 'colsample_bytree': 0.7577916169254649, 'reg_alpha': 0.008948061409181867, 'reg_lambda': 0.03188419400042777}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 186, 'num_leaves': 54, 'min_child_samples': 4, 'learning_rate': 0.23158746785935408, 'log_max_bin': 8, 'colsample_bytree': 0.7577916169254649, 'reg_alpha': 0.008948061409181867, 'reg_lambda': 0.03188419400042777}}
{'Current Learner': 'lgbm', 'Current Sample': 4221, 'Current Hyper-parameters': {'n_estimators': 119, 'num_leaves': 27, 'min_child_samples': 3, 'learning_rate': 0.17571044190405946, 'log_max_bin': 8, 'colsample_bytree': 0.9185336079509137, 'reg_alpha': 0.12438679004009143, 'reg_lambda': 0.01900130015865831}, 'Best Learner': 'lgbm', 'Best Hyper-parameters': {'n_estimators': 119, 'num_leaves': 27, 'min_child_samples': 3, 'learning_rate': 0.17571044190405946, 'log_max_bin': 8, 'colsample_bytree': 0.9185336079509137, 'reg_alpha': 0.12438679004009143, 'reg_lambda': 0.01900130015865831}}

plt.title('Learning Curve') plt.xlabel('Wall Clock Time (s)') plt.ylabel('Validation r2') plt.scatter(time_history, 1 - np.array(valid_loss_history)) plt.step(time_history, 1 - np.array(best_valid_loss_history), where='post') plt.show()

automl.best_iteration

print('flaml r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))

flaml r2 = 0.8911677265464114

import lightgbm as lgb print(f'lightgbm version = {lgb.__version__}')

lightgbm version = 3.3.0

params={'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 1.0, 'importance_type': 'split', 'learning_rate': 0.1, 'max_depth': -1, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 100, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 5.0, 'reg_lambda': 0.0, 'silent': True, 'subsample': 1.0, 'subsample_for_bin': 200000, 'subsample_freq': 0 } # reg = lgb.LGBMRegressor() lgbm = lgb.LGBMRegressor(**params)

fit_params={'early_stopping_rounds':100, 'eval_set': [(X_test, y_test)], 'verbose': 10, } lgbm.fit(X_train, y_train, **fit_params) # with early_stopping and lgb.plot_metric

[10]	valid_0's l2: 2.29424e+07
[20]	valid_0's l2: 1.34804e+07
[30]	valid_0's l2: 1.12093e+07
[40]	valid_0's l2: 1.05607e+07
[50]	valid_0's l2: 1.0238e+07
[60]	valid_0's l2: 1.013e+07
[70]	valid_0's l2: 1.00044e+07
[80]	valid_0's l2: 9.9589e+06
[90]	valid_0's l2: 9.93005e+06
[100]	valid_0's l2: 9.88753e+06

y_pred = lgbm.predict(X_test) from flaml.ml import sklearn_metric_loss_score print('default lgbm r2', '=', 1 - sklearn_metric_loss_score('r2', y_pred, y_test))

default lgbm r2 = 0.8779801320818768

lgb.plot_metric(lgbm) #หลังจากเทรนด์ไปประมาณ 20 รอบ l2 ที่ได้ก็เริ่มคงที่

lgbm.score(X_train, y_train)

lgbm.predict(X_test[:5])

from sklearn.model_selection import ShuffleSplit cv = ShuffleSplit(n_splits=5, test_size=0.2) scores = cross_val_score(lgbm, X, y, cv=cv) print(scores) print(f'mean scores = {scores.mean()}, sd={scores.std():.4f}')

[0.84913259 0.90094793 0.8444652  0.92274986 0.92563647]
mean scores = 0.8885864095521896, sd=0.0352

!pip install -U graphviz show_info=['split_gain', 'internal_value', 'data_percentage', 'leaf_count'] orientation='vertical' max_num_trees=lgbm.__dict__['_Booster'].num_trees() w_tree_index=widgets.BoundedIntText( value=0, min=0, max=max_num_trees-1, step=1, continuous_update=True, ) w_max_depth=widgets.BoundedIntText( value=3, min=2, max=10, step=1, continuous_update=True, ) w_n_estimators=widgets.BoundedIntText( value=3, min=1, max=100, step=1, continuous_update=True, ) @interact def plot_tree(tree_index=w_tree_index, orientation=['vertical', 'horizontal'], max_depth=w_max_depth, n_estimators=w_n_estimators, save_tree_img=[False, True]): params={ 'subsample': 1.0, 'n_estimators': n_estimators, 'max_depth': max_depth } case_index=1 print(X_test.iloc[[case_index]]) print(f'predicted value = {lgbm.predict(X_test.iloc[[case_index]])}') w_tree_index.max=w_n_estimators.value-1 g=lgb.create_tree_digraph(lgbm, orientation=orientation, tree_index=tree_index, show_info=show_info, precision=4) if save_tree_img: g.format='png' g.render(f'tree{tree_index}', view=False, cleanup=True) return g

Unsupported output type: clearOutput

       year transmission fuelType  engineSize  mfg  mileage_km    kml
985  2016.0       Manual   Diesel         2.0  bmw    64240.02  25.04
predicted value = [15583.23850216]

Execution error

ImportError: You must install graphviz and restart your session to plot tree.

!pip install shap import shap print(f'shap version {shap.__version__}') # load JS visualization code to notebook shap.initjs()

Collecting shap
  Downloading shap-0.40.0.tar.gz (371 kB)
     |████████████████████████████████| 371 kB 32.3 MB/s 
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
    Preparing wheel metadata ... done
Requirement already satisfied: numpy in /shared-libs/python3.7/py/lib/python3.7/site-packages (from shap) (1.19.5)
Requirement already satisfied: pandas in /shared-libs/python3.7/py/lib/python3.7/site-packages (from shap) (1.2.5)
Collecting cloudpickle
  Downloading cloudpickle-2.0.0-py3-none-any.whl (25 kB)
Collecting numba
  Downloading numba-0.54.1-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (3.3 MB)
     |████████████████████████████████| 3.3 MB 36.9 MB/s 
Requirement already satisfied: scipy in /shared-libs/python3.7/py/lib/python3.7/site-packages (from shap) (1.7.1)
Requirement already satisfied: scikit-learn in /shared-libs/python3.7/py/lib/python3.7/site-packages (from shap) (1.0)
Requirement already satisfied: packaging>20.9 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from shap) (21.0)
Collecting slicer==0.0.7
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Requirement already satisfied: tqdm>4.25.0 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from shap) (4.62.3)
Requirement already satisfied: pyparsing>=2.0.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from packaging>20.9->shap) (2.4.7)
Requirement already satisfied: setuptools in /root/venv/lib/python3.7/site-packages (from numba->shap) (58.1.0)
Collecting llvmlite<0.38,>=0.37.0rc1
  Downloading llvmlite-0.37.0-cp37-cp37m-manylinux2014_x86_64.whl (26.3 MB)
     |████████████████████████████████| 26.3 MB 39.2 MB/s 
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pandas->shap) (2021.3)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pandas->shap) (2.8.2)
Requirement already satisfied: six>=1.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas->shap) (1.16.0)
Requirement already satisfied: joblib>=0.11 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from scikit-learn->shap) (1.1.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from scikit-learn->shap) (3.0.0)
Building wheels for collected packages: shap
  Building wheel for shap (PEP 517) ... done
  Created wheel for shap: filename=shap-0.40.0-cp37-cp37m-linux_x86_64.whl size=433707 sha256=f83c3312b4eff4aa8ac37a62b2f563695c210b6ff34047da2439d38639cb7dad
  Stored in directory: /root/.cache/pip/wheels/ec/35/84/e304841ac4b910bc95fe9a6e5302eb2507b4333728851dcbfb
Successfully built shap
Installing collected packages: llvmlite, slicer, numba, cloudpickle, shap
Successfully installed cloudpickle-2.0.0 llvmlite-0.37.0 numba-0.54.1 shap-0.40.0 slicer-0.0.7
WARNING: You are using pip version 21.2.4; however, version 21.3.1 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
shap version 0.40.0

def case_detail(case_data): ''' format obj returned from shap.force_plot() ''' de=pd.DataFrame(case_data.data['features']) fcols=[] for i in case_data.data['features'].keys(): fcols.append(case_data.data['featureNames'][i]) de.columns=fcols return de def individual_case_plot(explainer, X, case_index, verbose=False): """ >>> individual_case_plot(explainer, X_train, 1) """ shap_values = explainer.shap_values(X.iloc[[case_index]]) g=shap.force_plot(explainer.expected_value, shap_values=shap_values, features=X.iloc[case_index, :]) if verbose: pprint(g.__dict__) return g

# explain the model's predictions using SHAP explainer = shap.TreeExplainer(lgbm) shap_values = explainer.shap_values(X)

shap_values[:3]

dshap=pd.DataFrame(shap_values, columns=feature_cols) dshap

# mean(abs(SHAP value)): average impact on model output magnitude feature_imp = np.abs(dshap).mean().sort_values(ascending=False) pd.DataFrame(feature_imp).style.background_gradient(cmap='Blues')

shap.summary_plot(dshap, X, plot_type="bar")

# summarize the effects of all the features shap.summary_plot(shap_values, X)