import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, classification_report, confusion_matrix
data_GULF = pd.read_csv('SET_DLY_GULF, 1D.csv')
data_BGRIM = pd.read_csv('SET_DLY_BGRIM, 1D.csv')
data_GPSC = pd.read_csv('SET_DLY_GPSC, 1D.csv')
data_GULF
data_BGRIM = data_BGRIM.rename(columns={'Volume MA':'Volume_MA', '%K':'percentK', '%D':'percentD'})
data_GULF = data_GULF.rename(columns={'Volume MA':'Volume_MA', '%K':'percentK', '%D':'percentD'})
data_GPSC = data_GPSC.rename(columns={'Volume MA':'Volume_MA', '%K':'percentK', '%D':'percentD'})
del data_GULF["open"], data_GULF["high"], data_GULF["low"], data_GULF["ATR"], data_GULF["Plot"]
del data_BGRIM["open"], data_BGRIM["high"], data_BGRIM["low"], data_BGRIM["ATR"], data_BGRIM["Plot"]
del data_GPSC["open"], data_GPSC["high"], data_GPSC["low"], data_GPSC["ATR"], data_GPSC["Plot"]
#data_GULF.insert('i1',[])
Stock = [data_GULF,data_BGRIM,data_GPSC]
EMA100_ind = []
for st in Stock :
EMA100_ind.append('Hold')
EMA100_ind.append('Hold')
EMA100_ind.append('Hold')
for n in range(3,len(st)) :
if st.close[n] - st.EMA100[n] >0 :
if st.close[n-1]< st.EMA100[n-1] and st.close[n-2] < st.EMA100[n-2] and st.close[n-3] < st.EMA100[n-3]:
EMA100_ind.append('Buy')
else :
EMA100_ind.append('Hold')
elif st.close[n] - st.EMA100[n] <0 :
if st.close[n-1] > st.EMA100[n-1] and st.close[n-2] > st.EMA100[n-2] and st.close[n-3] > st.EMA100[n-3]:
EMA100_ind.append('Sell')
else :
EMA100_ind.append('Hold')
else :
EMA100_ind.append('Hold')
print(EMA100_ind)
print(len(EMA100_ind))
EMA45_ind = []
for st in Stock :
EMA45_ind.append('Hold')
EMA45_ind.append('Hold')
EMA45_ind.append('Hold')
for n in range(3,len(st)) :
if st.close[n] - st.EMA45[n] >0 :
if st.close[n-1]< st.EMA45[n-1] and st.close[n-2] < st.EMA45[n-2] and st.close[n-3] < st.EMA45[n-3]:
EMA45_ind.append('Buy')
else :
EMA45_ind.append('Hold')
elif st.close[n] - st.EMA45[n] <0 :
if st.close[n-1] > st.EMA45[n-1] and st.close[n-2] > st.EMA45[n-2] and st.close[n-3] > st.EMA45[n-3]:
EMA45_ind.append('Sell')
else :
EMA45_ind.append('Hold')
else :
EMA45_ind.append('Hold')
print(EMA45_ind)
print(len(EMA45_ind))
EMA15_ind = []
for st in Stock :
EMA15_ind.append('Hold')
EMA15_ind.append('Hold')
EMA15_ind.append('Hold')
for n in range(3,len(st)) :
if st.close[n] - st.EMA15[n] >0 :
if st.close[n-1]< st.EMA15[n-1] and st.close[n-2] < st.EMA15[n-2] and st.close[n-3] < st.EMA15[n-3]:
EMA15_ind.append('Buy')
else :
EMA15_ind.append('Hold')
elif st.close[n] - st.EMA15[n] <0 :
if st.close[n-1] > st.EMA15[n-1] and st.close[n-2] > st.EMA15[n-2] and st.close[n-3] > st.EMA15[n-3]:
EMA15_ind.append('Sell')
else :
EMA15_ind.append('Hold')
else :
EMA15_ind.append('Hold')
print(EMA15_ind)
print(len(EMA15_ind))
SAR_ind=[]
for st in Stock :
SAR_ind.append('Hold')
for n in range(1,len(st)) :
if st.ParabolicSAR[n] < st.close[n] and st.ParabolicSAR[n-1]>st.close[n-1] :
SAR_ind.append('Buy')
elif st.ParabolicSAR[n] > st.close[n] and st.ParabolicSAR[n-1]< st.close[n-1] :
SAR_ind.append('Sell')
else :
SAR_ind.append('Hold')
print(SAR_ind)
print(len(SAR_ind))
HIS_ind=[]
for st in Stock :
HIS_ind.append('Hold')
HIS_ind.append('Hold')
for n in range(2,len(st)) :
if st.Histogram[n]<0 and st.Histogram[n-1]<0 and st.Histogram[n-2]>0:
HIS_ind.append('Buy')
elif st.Histogram[n]>0 and st.Histogram[n-1]>0 and st.Histogram[n-2]<0:
HIS_ind.append('Sell')
else :
HIS_ind.append('Hold')
print(HIS_ind)
print(len(HIS_ind))
RSI_ind=[]
for st in Stock :
for n in range(len(st)) :
if st.RSI[n] >= 85 :
RSI_ind.append('Sell')
elif st.RSI[n] <= 20 :
RSI_ind.append('Buy')
else :
RSI_ind.append('Hold')
print(RSI_ind)
print(len(RSI_ind))
STO_ind=[]
for st in Stock :
STO_ind.append('Hold')
STO_ind.append('Hold')
for n in range(2,len(st)) :
if st.percentK[n] > st.percentD[n] and st.percentK[n-1] < st.percentD[n-1] and st.percentK[n-2] < st.percentD[n-2] and st.percentD[n]<=7 :
STO_ind.append('Buy')
elif st.percentK[n] < st.percentD[n] and st.percentK[n-1] > st.percentD[n-1] and st.percentK[n-2] > st.percentD[n-2] and st.percentD[n]>=85:
STO_ind.append('Sell')
else :
STO_ind.append('Hold')
print(STO_ind)
print(len(STO_ind))
BOLL_ind=[]
for st in Stock :
for n in range(len(st)) :
if st.close[n] == st.Lower[n] :
BOLL_ind.append('Buy')
elif st.close[n] == st.Upper[n] :
BOLL_ind.append('Sell')
else :
BOLL_ind.append('Hold')
print(BOLL_ind)
print(len(BOLL_ind))
OBV_ind=[]
for st in Stock :
for n in range(len(st)) :
if st.Volume_MA[n] - st.Volume[n]> 0 :
OBV_ind.append('Buy')
elif st.Volume_MA[n] - st.Volume[n]< 0 :
OBV_ind.append('Sell')
else :
OBV_ind.append('Hold')
print(OBV_ind)
print(len(OBV_ind))
PVT_ind=[]
for st in Stock :
PVT_ind.append('Hold')
for n in range(1,len(st)) :
if (st.PVT[n]-st.PVT[n-1])/st.PVT[n] < -0.15:
PVT_ind.append('Sell')
elif (st.PVT[n]-st.PVT[n-1])/st.PVT[n] > 0.15:
PVT_ind.append('Buy')
else :
PVT_ind.append('Hold')
print(PVT_ind)
print(len(PVT_ind))
#output data
Real = []
for i in range(len(data_BGRIM)-1):
if data_BGRIM["close"][i+1]>data_BGRIM["close"][i] :
Real.append('Buy')
elif data_BGRIM["close"][i+1]<data_BGRIM["close"][i] :
Real.append('Sell')
else:
Real.append('Hold')
for i in range(len(data_GPSC)-1):
if data_GPSC["close"][i+1]>data_GPSC["close"][i] :
Real.append('Buy')
elif data_GPSC["close"][i+1]<data_GPSC["close"][i] :
Real.append('Sell')
else:
Real.append('Hold')
for i in range(len(data_GULF)-1):
if data_GULF["close"][i+1]>data_GULF["close"][i] :
Real.append('Buy')
elif data_GULF["close"][i+1]<data_GULF["close"][i] :
Real.append('Sell')
else:
Real.append('Hold')
print(Real)
data = pd.DataFrame(list(zip(EMA100_ind,EMA45_ind,EMA15_ind,SAR_ind,HIS_ind,RSI_ind,STO_ind,BOLL_ind,OBV_ind,PVT_ind,Real)),columns=['EMA100','EMA45','EMA15','ParabolicSAR','Histogram','Relative Strength Index','Stochastic Oscillator','Bollinger bands','On-Balance Volume [OBV] MA Cross Over','Price Volume Trend','Output'])
data
#Change value from string to integer
#-1 = Buy, 0 = Hold, 1 = Sell
data = data.replace('Buy',-1)
data = data.replace('Hold',0)
data = data.replace('Sell',1)
data
X_train, X_test = train_test_split(data)
Y_train = X_train['Output'].values.reshape(-1,1)
Y_test = X_test['Output'].values.reshape(-1,1)
X_train = X_train.drop(['Output'], axis=1)
X_test = X_test.drop(['Output'], axis=1)
clf = RandomForestClassifier().fit(X_train, Y_train)
Y_predict = clf.predict(X_test)
print('Mean Square Error: ',mean_squared_error(Y_test, Y_predict))
print('Root Mean Square Error: ',np.sqrt(mean_squared_error(Y_test, Y_predict)))
print('Score: ',clf.score(X_test,Y_test))
#print(confusion_matrix(Y_test,Y_predict))
print(classification_report(Y_test,Y_predict))
features = list(data.columns)
features.pop(-1)
importances = clf.feature_importances_
pd.Series(importances,index=features).sort_values(ascending=False)
std = np.std([tree.feature_importances_ for tree in clf.estimators_],axis=0)
indices = np.argsort(importances)[::-1]
plt.figure()
plt.title("Feature importances")
plt.bar(range(X_train.shape[1]), importances[indices],color="b", yerr=std[indices], align="center")
plt.xticks(range(X_train.shape[1]), indices)
plt.xlim([-1, X_train.shape[1]])
plt.show()