Keras Tuner
import os
import numpy as np
import pandas as pd
import seaborn as sns
import tempfile
import tensorflow as tf
import urllib.request
from keras_tuner import RandomSearch
from tensorflow import keras
from tensorflow.keras import layers, losses, metrics, optimizers
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, StandardScaler
Preparing the data
DATA_DIRECTORY = tempfile.mkdtemp(prefix="keras-tuner-data")
DATA_FILEPATH = os.path.join(DATA_DIRECTORY, "penguins.csv")
urllib.request.urlretrieve(
"https://storage.googleapis.com/download.tensorflow.org/data/palmer_penguins/penguins_size.csv",
DATA_FILEPATH
)
df = pd.read_csv(DATA_FILEPATH)
numerical_columns = [column for column in df.columns if df[column].dtype in ["int64", "float64"]]
numerical_preprocessor = Pipeline(steps=[
("imputer", SimpleImputer(strategy="mean")),
("scaler", StandardScaler())
])
categorical_preprocessor = Pipeline(steps=[
("imputer", SimpleImputer(strategy="most_frequent")),
("onehot", OneHotEncoder(handle_unknown="ignore"))
])
preprocessor = ColumnTransformer(
transformers=[
("numerical_preprocessor", numerical_preprocessor, numerical_columns),
("categorical_preprocessor", categorical_preprocessor, ["island"]),
]
)
y = df.species
X = df.drop(["species", "sex"], axis=1)
X_train, X_test, y_train, y_test = train_test_split(
X,
y,
test_size=0.20,
random_state=42
)
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)
label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)
Set up the model
def _model(hp):
model = keras.Sequential([
layers.Dense(
hp.Int("dense_1_units", min_value=4, max_value=12, step=4, default=8),
input_shape=(X_train.shape[1],)
),
layers.Dense(
hp.Int("dense_2_units", min_value=4, max_value=12, step=4, default=8),
activation="relu"
),
layers.Dense(3, activation="softmax"),
])
model.compile(
optimizer=optimizers.Adam(
hp.Choice("learning_rate", values=[1e-2, 1e-3])
),
loss="categorical_crossentropy",
metrics=["accuracy"]
)
return model
Searching for the best values
tuner = RandomSearch(
_model,
objective="val_accuracy",
max_trials=10,
overwrite=True,
directory="keras-tuner",
project_name="keras-tuner-example",
)
tuner.search_space_summary()
tuner.search(
X_train[:,:],
to_categorical(y_train),
epochs=5,
validation_data=(X_test[:,:], to_categorical(y_test))
)
tuner.results_summary()
Using the best model
model = tuner.get_best_models(num_models=1)[0]
y_pred = np.argmax(model.predict(X_test), axis=-1)
accuracy = np.sum(y_pred == y_test) / len(y_test) * 100
accuracy