Neural Networks with Tensoflow for stock trend & price prediction: Microsoft stock
!pip install yfinance
Loading in the data
import numpy as np
import pandas as pd
import seaborn as sns
import io
import yfinance as yf
import matplotlib.pyplot as plt
import random
import time
from collections import deque
from sklearn import preprocessing
# Load in Microsoft Stock Data
msft = yf.Ticker('MSFT')
history = msft.history(period='max', interval='1d')
hist_data = history['Close']
df = pd.DataFrame(history)
df.head()
Data visualisation
period_2y = df['2020-01-03':'2021-10-01']
plt.rcParams['figure.figsize']= (20, 10)
plt.style.use('fivethirtyeight')
# add a scatter plot layer for the high point
plt.scatter(
period_2y.index,
period_2y['High'],
color='darkgreen',
marker='o',
alpha=0.5,
label='High'
)
# add a scatter plot layer for the los point
plt.scatter(
period_2y.index,
period_2y['Low'],
color='darkred',
marker='o',
alpha=0.5,
label='Low'
)
# add a scatter plot layer for the opening price
plt.scatter(
period_2y.index,
period_2y['Open'],
color='grey',
marker='o',
alpha=0.5,
label='Open'
)
# Add a plot plot for the closing price
plt.plot(
period_2y.index,
period_2y['Close'],
color='dimgrey',
marker=None,
label='Close')
plt.ylabel('Price in USD')
plt.xlabel('Date')
plt.legend(loc="upper left")
plt.show()
period_5y = df['2016-01-03':'2021-10-01']
plt.rcParams['figure.figsize']= (20, 10)
plt.style.use('fivethirtyeight')
# Add a plot plot for the closing price
plt.plot(
period_5y.index,
period_5y['Close'],
color='dimgrey',
marker=None,
label='Close')
plt.ylabel('Price in USD')
plt.xlabel('Date')
plt.legend(loc="upper left")
plt.show()
# Converting the index as date
df.index = pd.to_datetime(df.index)
period_25y = df.loc['1996-01-03':'2021-07-28']
period_25y_hist = period_25y['Close']
plt.style.use('fivethirtyeight')
plt.figure(figsize=(30, 5))
plt.plot(period_25y,
color='dimgrey',
linestyle=None
)
# Find our max price in a given period, plot it as a red dot
for item in np.array_split(period_25y_hist, 20):
max_price = item.max()
max_price_loc = period_25y_hist[period_25y['Close'] == max_price]
plt.plot(max_price_loc, marker='o', color='firebrick')
plt.xlabel('Year')
plt.ylabel('Stock price')
plt.show()
revenue = [28.37, 32.19, 36.84, 39.79, 44.28, 51.12, 60.42, 58.44, 62.48, 69.94, 73.72, 77.85, 86.83, 93.58, 85.32, 96.57, 110.36, 125.84, 143.02, 168.09]
year = ['2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021']
period_20y = df.loc['2000-01-03':'2021-07-28']
period_20y_hist = period_20y['Close']
plt.style.use('fivethirtyeight')
plt.figure(figsize=(10, 5))
plt.bar(year,
revenue,
color='dimgrey',
width=0.5,
linewidth=0.5,
edgecolor='teal'
)
plt.xlabel('Year')
plt.xticks(rotation=70)
plt.ylabel('Revenue in Billion US$')
plt.show()
Demand and traded volume of stock
# Let's look at the volume of the stock traded and find out, whether or not the volume will add valuable information.
volume = history['Volume']
volume_df = pd.DataFrame(volume)
volume_df = volume_df.dropna()
volume_df_15yr = volume_df['2005-01-03':'2021-07-28']
volume_df_5yr = volume_df['2015-01-03':'2021-07-28']
plt.clf()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25,5))
ax1.plot(
volume_df.index,
volume_df['Volume'],
color='dimgrey',
linewidth=1
)
ax1.set_xlabel('Years')
ax2.plot(
volume_df_5yr.index,
volume_df_5yr['Volume'],
color='dimgrey',
linewidth=1
)
ax2.set_xlabel('Years')
plt.show()
period_5y = df['2015-01-03':'2021-07-28']
period_5y = period_5y.dropna()
plt.clf()
fig, ax = plt.subplots(figsize=(25,5))
ax.plot(volume_df_5yr.index,
volume_df_5yr['Volume'],
color='dimgrey',
linewidth=2
)
ax.set_ylabel('Volume traded')
ax.set_xlabel('Time in years ')
ax2 = ax.twinx()
ax2.plot(volume_df_5yr.index,
period_5y['Close'],
color='coral',
linewidth=2
)
ax2.set_ylabel('Stock price')
plt.legend()
plt.show()
Correlations
plt.style.use('ggplot')
plt.figure(figsize=(10, 10))
sns.heatmap(history.corr(),
annot=True,
linewidths=1
)
plt.show()
plt.style.use('ggplot')
plt.figure(figsize=(10, 10))
period_2y = df['2020-01-03':'2021-10-01']
sns.heatmap(period_2y.corr(),
annot=True,
linewidths=1
)
plt.show()
Simple moving averages and relative strenght index
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz
%cd ta-lib
!./configure --prefix=/usr
!make
!make install
!pip install Ta-Lib
import talib
feature_names = [] # a list of the feature names for later
# Create moving averages and rsi for timeperiods of 5, 30, 200, 600 and 800
for n in [5, 30, 600]:
# Create the moving average indicator and divide by Adj_Close
df['ma' + str(n)] = talib.SMA(df['Close'].values,
timeperiod=n) / df['Close']
# Create the RSI indicator
df['rsi' + str(n)] = talib.RSI(df['Close'].values, timeperiod=n)
# Add rsi and moving average to the feature name list
feature_names = feature_names + ['ma' + str(n), 'rsi' + str(n)]
print(feature_names)
df = df.dropna()
print(df.head())
Data preparation
# All constant variables go here
seq_len = 60
future_period_predict = 3
ratio_to_predict = 'ETH-USD'
epochs = 50
batch_size = 64
name = 'Model nr. {}'.format(int(time.time()))
def classify(current, future):
if float(future) > float(current):
return 1
else:
return 0
def preprocess_df(df):
df = df.drop('future', 1)
for col in df.columns:
if col != 'target':
df[col] = df[col].pct_change()
df.dropna(inplace=True)
df[col] = preprocessing.scale(df[col].values)
df.dropna(inplace=True)
sequential_data = []
prev_days = deque(maxlen=seq_len)
print(df.head())
for c in df.columns:
print(c)
for i in df.values:
prev_days.append([n for n in i[:-1]])
if len(prev_days) == seq_len:
sequential_data.append([np.array(prev_days), i[-1]])
random.shuffle(sequential_data) # For np objects, use np.random.shuffle instead!
buys = []
sells = []
for seq, target in sequential_data:
if target == 0:
sells.append([seq, target])
elif target == 1:
buys.append([seq, target])
random.shuffle(buys)
random.shuffle(sells)
lower = min(len(buys), len(sells))
buys = buys[:lower]
sells = sells[:lower]
sequential_data = buys + sells
random.shuffle(sequential_data)
X = []
y = []
for seq, target in sequential_data:
X.append(seq)
y.append(target)
return np.array(X).astype("float32"), np.array(y)
df.drop(['Dividends', 'Stock Splits'], axis=1, inplace=True)
df.head()
df['future'] = df['Close'].shift(-5)
df['target'] = list(map(classify, df['Close'], df['future']))
df.head()
times = sorted(df.index.values)
last_5pct = times[-int(0.05*len(times))]
print(last_5pct)
validation_df = df[(df.index >= last_5pct)]
df = df[(df.index <= last_5pct)]
X_train, y_train = preprocess_df(df)
X_test, y_test = preprocess_df(validation_df)
print(X_train.shape)
Creating a neural network
!pip install tensorflow
# Import Tensorflow and Keras
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn import preprocessing
model = Sequential()
model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True, activation='tanh'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True, activation='tanh'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(LSTM(64, input_shape=(X_train.shape[1:]), activation='tanh'))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1, activation='linear'))
model.summary()
# Create custom loss function
def sign_penalty(y_true, y_pred):
penalty = 3.
loss = tf.where(tf.less(y_true * y_pred, 0),
penalty * tf.square(y_true - y_pred),
tf.square(y_true - y_pred))
return tf.reduce_mean(loss, axis=-1)
# Add the loss function to keras
keras.losses.sign_penalty = sign_penalty
opti = tf.keras.optimizers.SGD(lr=0.001, decay=1e-6)
model.compile(
loss = sign_penalty,
optimizer = opti,
metrics = ['accuracy']
)
history = model.fit(
X_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, y_test),
)
import time
model.save('Microsoft neural network model on {}'.format(time.time))
Looking a the results and predicting the price
plt.figure(figsize=(10, 10))
plt.plot(history.history['loss'])
plt.title('Loss:' + str(round(history.history['loss'][-1], 6)))
plt.xlabel('epochs')
plt.ylabel('loss')
plt.show
from sklearn.metrics import r2_score
# Calc the score
score = model.evaluate(X_test, y_test, verbose=0)
print('The loss of the model is: {}. Accuracy of the score is: {}'.format(round(score[0], 3), round(score[1], 3)))
# Prepare data for prediction
prediction = model.predict(X_test)
print(prediction)
# Make sure the predictions are a float
prediction.astype('float')
# Plot out the predictions vs the actual data
fig, ax = plt.subplots(figsize=(15, 5))
#ax.plot(X_test[:, 1], color='grey', lw=2)
ax2 = ax.twinx()
ax2.plot(prediction, color='green', lw=1)
plt.show()
Comparing the model to a baseline
period_5y_close = period_5y['Close']
period_5y_close.head()
x = np.arange(period_5y_close.index.size)
fit = np.polyfit(x, period_5y_close, deg=1)
print ("Slope : " + str(fit[0]))
print ("Intercept : " + str(fit[1]))
fit_function = np.poly1d(fit)
# Plot the linear regression
plt.plot(
period_5y_close.index,
fit_function(x),
color='darkred'
)
# Time series stock data plot
plt.plot(
period_5y_close.index,
period_5y_close,
color='dimgrey'
)
# Set labels
plt.xlabel('Date')
plt.ylabel('Price in USD')
plt.show()