Pre-Requisites to Understanding this Article
Recap of the IQR Methodology
Now, how does the Isolation Forest differ from this method? 🌲
Implementing the Isolation Forest for Anomaly Detection
Step One - Instantiate your Isolation Forest
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
import seaborn as sns
sns.set_style("whitegrid")
mpl.rcParams['figure.figsize'] = (20,5)
from sklearn.ensemble import IsolationForest
df = pd.read_csv(r'/work/Time_Series_Chemical_Machinery_Dataset.csv', index_col="Date (DD/MM/YYYY)",parse_dates=True, dayfirst=True)
X = df[['Motor_Power']]
motor_power = (X['Motor_Power'].values.reshape(-1,1))
model_isoforest = IsolationForest()
model_isoforest.fit(motor_power)
scores = model_isoforest.score_samples(motor_power)
X['anomaly_scores'] = model_isoforest.score_samples(motor_power)
X['anomaly_classification'] = model_isoforest.predict(X['Motor_Power'].values.reshape(-1,1))
Step Two - Fit the Isolation Forest and generate the Anomalous Scores
model_isoforest.fit(motor_power)
scores = model_isoforest.score_samples(motor_power)
X['anomaly_scores'] = model_isoforest.score_samples(motor_power)
X['anomaly_classification'] = model_isoforest.predict(X['Motor_Power'].values.reshape(-1,1))
X
X[['Motor_Power','anomaly_classification']].plot()
plt.title("Isolation Forest at 0.5 Anomaly Score Threshold")
plt.show()
from adtk.detector import InterQuartileRangeAD
from adtk.visualization import plot
X['anomaly_classification_cutoff'] = np.where(X['anomaly_scores']<-0.75,1,0)
X[['Motor_Power','anomaly_classification_cutoff']].plot()
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.title("Isolation Forest at 0.75 Anomaly Score Threshold")
#IQR Methodology using the ADTK Library
iqr_ad = InterQuartileRangeAD(c=1.5) 
anomalies = iqr_ad.fit_detect(df['Motor_Power'])
plot(df['Motor_Power'], anomaly=anomalies, ts_linewidth=3, ts_markersize=3, anomaly_markersize=5, anomaly_color='deeppink', anomaly_tag="marker")
df['Motor Trip Failure'].plot(color='black', marker="D", linewidth=1.5)
plt.title("IQR Anomaly Detection")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()