Untitled Python Project

import numpy as np import matplotlib.pyplot as plt import pandas as pd import datetime as dt

pip install xlrd

m_wires = pd.read_excel('/work/BAB262-XLS-ENG.xls', sheet_name = 'Wires By Month modified')

m_wires

m_wires.info()

m_wires['month in year'] = m_wires['Month'].dt.month

monthly = pd.melt(m_wires, id_vars = 'month in year',value_vars = ['2007','2008','2009','2010','2011','2012','2013']) monthly.columns = ['month','year','wires'] monthly['ym'] = monthly['year'].map(str) + "/" + monthly['month'].map(str) monthly['ym'] = pd.to_datetime(monthly['ym'],format = '%Y/%m') monthly.head()

monthly['year'] = monthly['year'].astype(int) monthly['timeindex'] = monthly['year'] + monthly['month']/12 monthly['timeindex'].head()

monthly = monthly.dropna()

m, b = np.polyfit(monthly['timeindex'], monthly['wires'], 1) m, b

plt.figure(figsize = (16,9)) plt.scatter(monthly['timeindex'],monthly['wires']) m, b = np.polyfit(monthly['timeindex'], monthly['wires'], 1) plt.plot(monthly['timeindex'], m*monthly['timeindex'] + b) plt.xlabel('year') plt.ylabel('Amount of wires') plt.show()

monthly = monthly.drop(index = 11, axis = 0)

monthly.head()

pip install statsmodels

import statsmodels.api as sm from statsmodels import regression X = monthly['timeindex'] y = monthly['wires'] # Note the difference in argument order X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary())

model.params

b1, m1 = model.params

plt.figure(figsize = (16,9)) plt.scatter(monthly['timeindex'], monthly['wires']) plt.plot(monthly['timeindex'], m1*monthly['timeindex'] + b1) plt.xlabel('year') plt.ylabel('Amount of wires') plt.show()

import seaborn as sns sns.boxplot(monthly['wires']) sns.stripplot(monthly['wires']) plt.show()

np.percentile(monthly['wires'],1)

nov = pd.read_excel('BAB262-XLS-ENG.xls', sheet_name = 'November 2010 Wires')

nov.head()

nov.info()

nov['date_n_time'] = pd.to_datetime(nov['date_n_time'],errors='coerce')

nov.info()

nov = nov.sort_values(by = 'date_n_time', ascending = True) nov = nov.set_index('date_n_time') nov.head()

nov['Transaction Amount'] = nov['Transaction Amount'].astype('float')

nov['Transaction Amount'].isnull().sum()

nov = nov.dropna()

nov.info()

plt.figure(figsize=(20,10)) nov['Transaction Amount'].plot() plt.ylabel("transaction amount in USD") plt.show()

nov['Transaction Amount'].describe()

nov['Transaction Amount'].sum()

nov1 = nov.reset_index() nov1 = nov1.sort_values(by = 'Transaction Amount', ascending = True) nov1 = nov1.reset_index() nov1 = nov1.loc[6: ,:] nov1 = nov1.sort_values(by = 'Transaction Amount', ascending = False) nov1 = nov1.reset_index() nov1 = nov1.loc[3: ,:] nov1.tail()

nov.sort_values(by = 'Transaction Amount', ascending = False).head(20)

nov2 = nov1.set_index('date_n_time') nov2 = nov2.resample("T").sum() nov2.head() nov3 = nov2.reset_index() nov3 = nov3.sort_values(by = 'date_n_time' , ascending = True)

nov3['Transaction Amount'].sum()

len(nov['Trans ID'].unique())

from statsmodels.tsa.stattools import adfuller def test_stationarity(timeseries): # Determining rolling statistics rolmean = timeseries.rolling(window = 2).mean() rolstd = timeseries.rolling(2).std() # Plot rolling statistics: orig = plt.plot(timeseries, color='blue',label='Original') mean = plt.plot(rolmean, color='red', label='Rolling Mean') std = plt.plot(rolstd, color='black', label = 'Rolling Std') plt.legend(loc='best') plt.title('Rolling Mean & Standard Deviation') plt.show(block=False) # Perform Dickey-Fuller test: print ('Results of Dickey-Fuller Test:') timeseries = timeseries.iloc[:].values dftest = adfuller(timeseries, autolag='AIC') dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) for key,value in dftest[4].items(): dfoutput['Critical Value (%s)'%key] = value print(dfoutput)

test_stationarity(nov2['Transaction Amount'])

# Import the statsmodels module for regression and the adfuller function import statsmodels.api as sm from statsmodels.tsa.stattools import adfuller def cointegration(dep, indep): # Regress BTC on ETH # Note the difference in argument order X = indep y = dep X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary()) b = model.params[1] # Compute ADF test_stationarity(indep - b*dep)

cointegration(nov['date_n_time'], nov3['Transaction Amount'])

pip install pystan==2.19.1.1

pip install prophet

from prophet import Prophet from prophet.plot import plot_plotly

nov = nov.reset_index()

nov2 = nov1.set_index('date_n_time') nov2 = nov2.resample("T").sum() nov2.head()

# create a dataframe nov2 = nov1.set_index('date_n_time') nov2 = nov2.resample("T").sum() nov2.head() nov3 = nov2.reset_index() nov3 = nov3.sort_values(by = 'date_n_time' , ascending = True) train_dataset= pd.DataFrame() train_dataset['ds'] = nov3['date_n_time'] train_dataset['y']= nov3['Transaction Amount'] train_dataset.head(2)

train_dataset= pd.DataFrame() train_dataset['ds'] = nov3['date_n_time'] train_dataset['y']= nov3['Transaction Amount'] train_dataset= train_dataset.dropna() train_dataset.head(2)

train_dataset['y'].hist(bins = 100)

# create a Prophet instance with default values to fit the dataset prophet_basic = Prophet() prophet_basic.fit(train_dataset)

# create a dataframe with ds (i.e., datetime stamp) that has the time series of dates we need for prediction # periods specify the number of days to extend into the future future= prophet_basic.make_future_dataframe(periods=1000, freq = 'T') future.tail()

# forecast BTC prices forecast=prophet_basic.predict(future)

# plot predicted BTC prices fig1 =prophet_basic.plot(forecast)

# plot the trend and seasonality fig1 = prophet_basic.plot_components(forecast)

# identify changepoints (i.e., datetime points when the time series exprience abrupt changes) from prophet.plot import add_changepoints_to_plot fig = prophet_basic.plot(forecast) a = add_changepoints_to_plot(fig.gca(), prophet_basic, forecast)

prophet_basic.changepoints

# adjust trend sensitivity with "changepoint_prior_scale" parameter # default value is 0.05. Lower value, less flexible trend, and vice versa pro_change= Prophet(changepoint_prior_scale=0.15) forecast = pro_change.fit(train_dataset).predict(future) fig= pro_change.plot(forecast) a = add_changepoints_to_plot(fig.gca(), prophet_basic, forecast)

from sklearn import tree from sklearn.datasets import load_wine from sklearn.model_selection import train_test_split

y = nov3['date_n_time'] x = nov3[['Transaction Amount']]

Xtrain, Xtest, Ytrain, Ytest = train_test_split(x, y, test_size = 0.3)

Xtrain.index = range(Xtrain.shape[0])

Clf = tree.DecisionTreeClassifier(criterion = "gini", random_state = 0, max_depth = 4, #min_samples_split = #min_samples_leaf ) Clf = Clf.fit(Xtrain, Ytrain) score = Clf.score(Xtest, Ytest) score