Neural Networks with Tensoflow for stock trend & price prediction: Microsoft stock

!pip install yfinance

Loading in the data

import numpy as np import pandas as pd import seaborn as sns import io import yfinance as yf import matplotlib.pyplot as plt import random import time from collections import deque from sklearn import preprocessing

# Load in Microsoft Stock Data msft = yf.Ticker('MSFT') history = msft.history(period='max', interval='1d') hist_data = history['Close']

df = pd.DataFrame(history) df.head()

Data visualisation

period_2y = df['2020-01-03':'2021-10-01'] plt.rcParams['figure.figsize']= (20, 10) plt.style.use('fivethirtyeight') # add a scatter plot layer for the high point plt.scatter( period_2y.index, period_2y['High'], color='darkgreen', marker='o', alpha=0.5, label='High' ) # add a scatter plot layer for the los point plt.scatter( period_2y.index, period_2y['Low'], color='darkred', marker='o', alpha=0.5, label='Low' ) # add a scatter plot layer for the opening price plt.scatter( period_2y.index, period_2y['Open'], color='grey', marker='o', alpha=0.5, label='Open' ) # Add a plot plot for the closing price plt.plot( period_2y.index, period_2y['Close'], color='dimgrey', marker=None, label='Close') plt.ylabel('Price in USD') plt.xlabel('Date') plt.legend(loc="upper left") plt.show()

period_5y = df['2016-01-03':'2021-10-01'] plt.rcParams['figure.figsize']= (20, 10) plt.style.use('fivethirtyeight') # Add a plot plot for the closing price plt.plot( period_5y.index, period_5y['Close'], color='dimgrey', marker=None, label='Close') plt.ylabel('Price in USD') plt.xlabel('Date') plt.legend(loc="upper left") plt.show()

# Converting the index as date df.index = pd.to_datetime(df.index) period_25y = df.loc['1996-01-03':'2021-07-28'] period_25y_hist = period_25y['Close'] plt.style.use('fivethirtyeight') plt.figure(figsize=(30, 5)) plt.plot(period_25y, color='dimgrey', linestyle=None ) # Find our max price in a given period, plot it as a red dot for item in np.array_split(period_25y_hist, 20): max_price = item.max() max_price_loc = period_25y_hist[period_25y['Close'] == max_price] plt.plot(max_price_loc, marker='o', color='firebrick') plt.xlabel('Year') plt.ylabel('Stock price') plt.show()

revenue = [28.37, 32.19, 36.84, 39.79, 44.28, 51.12, 60.42, 58.44, 62.48, 69.94, 73.72, 77.85, 86.83, 93.58, 85.32, 96.57, 110.36, 125.84, 143.02, 168.09] year = ['2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021'] period_20y = df.loc['2000-01-03':'2021-07-28'] period_20y_hist = period_20y['Close'] plt.style.use('fivethirtyeight') plt.figure(figsize=(10, 5)) plt.bar(year, revenue, color='dimgrey', width=0.5, linewidth=0.5, edgecolor='teal' ) plt.xlabel('Year') plt.xticks(rotation=70) plt.ylabel('Revenue in Billion US$') plt.show()

Demand and traded volume of stock

# Let's look at the volume of the stock traded and find out, whether or not the volume will add valuable information. volume = history['Volume'] volume_df = pd.DataFrame(volume) volume_df = volume_df.dropna() volume_df_15yr = volume_df['2005-01-03':'2021-07-28'] volume_df_5yr = volume_df['2015-01-03':'2021-07-28'] plt.clf() fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(25,5)) ax1.plot( volume_df.index, volume_df['Volume'], color='dimgrey', linewidth=1 ) ax1.set_xlabel('Years') ax2.plot( volume_df_5yr.index, volume_df_5yr['Volume'], color='dimgrey', linewidth=1 ) ax2.set_xlabel('Years') plt.show()

period_5y = df['2015-01-03':'2021-07-28'] period_5y = period_5y.dropna() plt.clf() fig, ax = plt.subplots(figsize=(25,5)) ax.plot(volume_df_5yr.index, volume_df_5yr['Volume'], color='dimgrey', linewidth=2 ) ax.set_ylabel('Volume traded') ax.set_xlabel('Time in years ') ax2 = ax.twinx() ax2.plot(volume_df_5yr.index, period_5y['Close'], color='coral', linewidth=2 ) ax2.set_ylabel('Stock price') plt.legend() plt.show()

Correlations

plt.style.use('ggplot') plt.figure(figsize=(10, 10)) sns.heatmap(history.corr(), annot=True, linewidths=1 ) plt.show()

plt.style.use('ggplot') plt.figure(figsize=(10, 10)) period_2y = df['2020-01-03':'2021-10-01'] sns.heatmap(period_2y.corr(), annot=True, linewidths=1 ) plt.show()

Simple moving averages and relative strenght index

!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz !tar -xzvf ta-lib-0.4.0-src.tar.gz %cd ta-lib !./configure --prefix=/usr !make !make install !pip install Ta-Lib import talib

feature_names = [] # a list of the feature names for later # Create moving averages and rsi for timeperiods of 5, 30, 200, 600 and 800 for n in [5, 30, 600]: # Create the moving average indicator and divide by Adj_Close df['ma' + str(n)] = talib.SMA(df['Close'].values, timeperiod=n) / df['Close'] # Create the RSI indicator df['rsi' + str(n)] = talib.RSI(df['Close'].values, timeperiod=n) # Add rsi and moving average to the feature name list feature_names = feature_names + ['ma' + str(n), 'rsi' + str(n)] print(feature_names) df = df.dropna() print(df.head())

Data preparation

# All constant variables go here seq_len = 60 future_period_predict = 3 ratio_to_predict = 'ETH-USD' epochs = 50 batch_size = 64 name = 'Model nr. {}'.format(int(time.time()))

def classify(current, future): if float(future) > float(current): return 1 else: return 0 def preprocess_df(df): df = df.drop('future', 1) for col in df.columns: if col != 'target': df[col] = df[col].pct_change() df.dropna(inplace=True) df[col] = preprocessing.scale(df[col].values) df.dropna(inplace=True) sequential_data = [] prev_days = deque(maxlen=seq_len) print(df.head()) for c in df.columns: print(c) for i in df.values: prev_days.append([n for n in i[:-1]]) if len(prev_days) == seq_len: sequential_data.append([np.array(prev_days), i[-1]]) random.shuffle(sequential_data) # For np objects, use np.random.shuffle instead! buys = [] sells = [] for seq, target in sequential_data: if target == 0: sells.append([seq, target]) elif target == 1: buys.append([seq, target]) random.shuffle(buys) random.shuffle(sells) lower = min(len(buys), len(sells)) buys = buys[:lower] sells = sells[:lower] sequential_data = buys + sells random.shuffle(sequential_data) X = [] y = [] for seq, target in sequential_data: X.append(seq) y.append(target) return np.array(X).astype("float32"), np.array(y)

df.drop(['Dividends', 'Stock Splits'], axis=1, inplace=True) df.head()

df['future'] = df['Close'].shift(-5) df['target'] = list(map(classify, df['Close'], df['future']))

df.head()

times = sorted(df.index.values) last_5pct = times[-int(0.05*len(times))] print(last_5pct)

validation_df = df[(df.index >= last_5pct)] df = df[(df.index <= last_5pct)]

X_train, y_train = preprocess_df(df) X_test, y_test = preprocess_df(validation_df) print(X_train.shape)

Creating a neural network

!pip install tensorflow

# Import Tensorflow and Keras import tensorflow as tf import keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization, Flatten from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint from tensorflow.keras.preprocessing.sequence import pad_sequences from sklearn import preprocessing

model = Sequential() model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True, activation='tanh')) model.add(Dropout(0.1)) model.add(BatchNormalization()) model.add(LSTM(128, input_shape=(X_train.shape[1:]), return_sequences=True, activation='tanh')) model.add(Dropout(0.1)) model.add(BatchNormalization()) model.add(LSTM(64, input_shape=(X_train.shape[1:]), activation='tanh')) model.add(Dropout(0.1)) model.add(BatchNormalization()) model.add(Dense(32, activation='relu')) model.add(Dropout(0.1)) model.add(Dense(1, activation='linear')) model.summary()

# Create custom loss function def sign_penalty(y_true, y_pred): penalty = 3. loss = tf.where(tf.less(y_true * y_pred, 0), penalty * tf.square(y_true - y_pred), tf.square(y_true - y_pred)) return tf.reduce_mean(loss, axis=-1) # Add the loss function to keras keras.losses.sign_penalty = sign_penalty

opti = tf.keras.optimizers.SGD(lr=0.001, decay=1e-6) model.compile( loss = sign_penalty, optimizer = opti, metrics = ['accuracy'] )

history = model.fit( X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), )

import time model.save('Microsoft neural network model on {}'.format(time.time))

Looking a the results and predicting the price

plt.figure(figsize=(10, 10)) plt.plot(history.history['loss']) plt.title('Loss:' + str(round(history.history['loss'][-1], 6))) plt.xlabel('epochs') plt.ylabel('loss') plt.show

from sklearn.metrics import r2_score # Calc the score score = model.evaluate(X_test, y_test, verbose=0) print('The loss of the model is: {}. Accuracy of the score is: {}'.format(round(score[0], 3), round(score[1], 3)))

# Prepare data for prediction

prediction = model.predict(X_test)

print(prediction)

# Make sure the predictions are a float prediction.astype('float') # Plot out the predictions vs the actual data fig, ax = plt.subplots(figsize=(15, 5)) #ax.plot(X_test[:, 1], color='grey', lw=2) ax2 = ax.twinx() ax2.plot(prediction, color='green', lw=1) plt.show()

Comparing the model to a baseline

period_5y_close = period_5y['Close'] period_5y_close.head()

x = np.arange(period_5y_close.index.size)

fit = np.polyfit(x, period_5y_close, deg=1) print ("Slope : " + str(fit[0])) print ("Intercept : " + str(fit[1]))

fit_function = np.poly1d(fit) # Plot the linear regression plt.plot( period_5y_close.index, fit_function(x), color='darkred' ) # Time series stock data plot plt.plot( period_5y_close.index, period_5y_close, color='dimgrey' ) # Set labels plt.xlabel('Date') plt.ylabel('Price in USD') plt.show()

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Neural Networks with Tensoflow for stock trend &amp; price prediction: Microsoft stock