select * from transaction_agg -- database table loaded with challenge data.
where day > '2025-01-03' -- ignoring new year holiday influence
Run to view results
select day, sum(quantity_transactions) as value from df_source_raw group by day order by day asc
Run to view results
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
df = df_source.copy()
df['day'] = pd.to_datetime(df['day'])
df.set_index('day', inplace=True)
df.sort_index(inplace=True)
seasonal_period = 7
decomp = seasonal_decompose(df['value'], model='additive', period=seasonal_period)
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(14, 10), sharex=True)
decomp.observed.plot(ax=ax1, legend=False, title="Observed")
decomp.trend.plot(ax=ax2, legend=False, title="Trend")
decomp.seasonal.plot(ax=ax3, legend=False, title="Seasonality")
decomp.resid.plot(ax=ax4, legend=False, title="Residue")
plt.tight_layout()
plt.show()
Run to view results
df['residue'] = decomp.resid
residue_mean = df['residue'].mean()
residue_std_dev = df['residue'].std()
upper_bound = residue_mean + 3 * residue_std_dev
lower_bound = residue_mean - 3 * residue_std_dev
df['anomaly'] = (df['residue'] > upper_bound) | (df['residue'] < lower_bound)
plt.figure(figsize=(14, 6))
plt.plot(df.index, df['value'], label='Value')
plt.plot(decomp.trend, label='Trend', linestyle='--')
anomalias = df[df['anomaly']]
plt.scatter(anomalias.index, anomalias['value'], color='red', label='Anomaly', s=100, zorder=5)
plt.title('Anomaly Detection Post Season Decomposition')
plt.legend()
plt.show()
print("Anomalies:")
df[df['anomaly']]
Run to view results
df['trend'] = decomp.trend
df['seasonality'] = decomp.seasonal
df = df[[
'value',
'trend',
'seasonality',
'residue',
'anomaly'
]]
df
Run to view results
df = df.reset_index()
Run to view results
select * from df
Run to view results
CREATE TABLE IF NOT EXISTS series_decomposition (
day DATE PRIMARY KEY,
value BIGINT,
tendencia DOUBLE PRECISION,
sazonalidade DOUBLE PRECISION,
residuo DOUBLE PRECISION,
anomalia BOOLEAN
);
Run to view results
DROP TABLE series_decomposition;
Run to view results
user = 'postgres.gmbcdfetwzkzutbqlhrk'
password = '12345678'
host = 'aws-0-us-east-1.pooler.supabase.com'
port = '5432'
database = 'postgres'
Run to view results
from sqlalchemy import create_engine
engine = create_engine(f'postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}')
Run to view results
df.to_sql('series_decomposition', engine, if_exists='replace', index=False)
Run to view results
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
import matplotlib.pyplot as plt
Run to view results
df = df_source.copy()
df['day_of_week'] = df['day'].dt.dayofweek
df['rolling_mean'] = df['value'].rolling(window=7, center=True, min_periods=1).mean()
df['rolling_std'] = df['value'].rolling(window=7, center=True, min_periods=1).std()
df['diff'] = df['value'].diff().fillna(0)
X = df[['value', 'day_of_week', 'rolling_mean', 'rolling_std', 'diff']].fillna(0)
model = OneClassSVM(kernel='rbf', gamma='scale', nu=0.05) # nu define a fração de anomalys esperadas
model.fit(X)
df['anomaly'] = model.predict(X)
df['anomaly'] = df['anomaly'].map({1: 0, -1: 1}) # 1 = normal, -1 = anomaly
plt.figure(figsize=(15, 5))
plt.plot(df['day'], df['value'], label='value')
plt.scatter(df.loc[df['anomaly'] == 1, 'day'],
df.loc[df['anomaly'] == 1, 'value'],
color='red', label='anomalys')
plt.title('Detecção de anomalys com SVM')
plt.xlabel('Data')
plt.ylabel('value')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
Run to view results
df
Run to view results
CREATE TABLE IF NOT EXISTS support_vector_machine (
day DATE PRIMARY KEY,
value BIGINT,
day_of_week INT,
rolling_mean DOUBLE PRECISION,
rolling_std DOUBLE PRECISION,
diff DOUBLE PRECISION,
anomaly BOOLEAN
);
Run to view results
df.to_sql('support_vector_machine', engine, if_exists='replace', index=False)
Run to view results