import pandas as pd
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import numpy as np
from tensorflow.keras import models, layers, regularizers
df = pd.read_csv("/content/sample_data/diamonds.csv")
df
df = pd.get_dummies(df, columns=["cut", "clarity", "color"])
X = df.columns.drop(["Unnamed: 0", "carat"])
#X = ["depth", "table", "price", "x", "y", "z"]
y = "carat"
scaler = StandardScaler()
scaler = scaler.fit(df[X])
df[X] = scaler.transform(df[X])
sns.set(rc={"figure.figsize": (15,9)})
sns.distplot(df["carat"])
df = df[df[y] < np.mean(df[y]) + np.std(df[y]) * 3]
sns.distplot(df["carat"])
df_train = df[:45000]
df_test = df[45000:]
model = models.Sequential()
model.add(layers.Dense(32, activation="relu", input_shape=(26,), kernel_regularizer=regularizers.l2(0.001)))
#model.add(layers.Dropout(0.5))
model.add(layers.Dense(32, activation="relu", kernel_regularizer=regularizers.l2(0.001)))
#model.add(layers.Dropout(0.5))
model.add(layers.Dense(1))
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(df_train[X], df_train[y], batch_size=512, epochs=50, validation_split=0.3)
sns.lineplot(x=range(0,50), y=history.history["val_loss"])
sns.lineplot(x=range(0,50), y=history.history["val_mae"])
model.evaluate(df_test[X], df_test[y])