!pip install -q tensorflow yfinance
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import timedelta
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
# Fetch historical stock data for S&P 500 from 2010 to 2023
ticker = '^GSPC'
start_date = '2010-01-01'
end_date = '2023-09-02'
data = yf.download(ticker, start=start_date, end=end_date)
data.head()
# Extract 'Close' prices and convert to numpy array
close_data = data['Close'].values.reshape(-1, 1)
# Normalize the data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(close_data)
# Create training and test datasets
train_size = int(len(scaled_data) * 0.8)
test_size = len(scaled_data) - train_size
train_data, test_data = scaled_data[0:train_size, :], scaled_data[train_size:len(scaled_data), :]
# Reshape the data for LSTM input
X_train, y_train, X_test, y_test = [], [], [], []
for i in range(60, len(train_data)):
X_train.append(train_data[i-60:i, 0])
y_train.append(train_data[i, 0])
for i in range(60, len(test_data)):
X_test.append(test_data[i-60:i, 0])
y_test.append(test_data[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
X_train.shape, X_test.shape
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, activation='relu', return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(LSTM(units=50, activation='relu', return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
# Summary of the model architecture
model.summary()
# Train the model
history = model.fit(
X_train,
y_train,
epochs=25,
batch_size=64,
validation_data=(X_test, y_test),
verbose=1
)
# Make predictions using the test data
predictions = model.predict(X_test)
# Transform predictions back to original scale
predictions = scaler.inverse_transform(np.reshape(predictions, (-1, 1)))
# Create a DataFrame to store the actual and predicted values
test_data_range = data.iloc[train_size + 60:]['Close'].index
comparison_df = pd.DataFrame({'Actual': data.iloc[train_size + 60:]['Close'].values, 'Predicted': np.squeeze(predictions)}, index=test_data_range)
comparison_df.head()
from sklearn.metrics import mean_squared_error
from math import sqrt
# Calculate RMSE
rmse = sqrt(mean_squared_error(comparison_df['Actual'], comparison_df['Predicted']))
rmse
# Plot the actual and predicted values
plt.figure(figsize=(16, 8))
plt.title('S&P 500 Price Prediction')
plt.xlabel('Date')
plt.ylabel('Close Price (USD)')
plt.plot(comparison_df['Actual'], label='Actual', color='blue')
plt.plot(comparison_df['Predicted'], label='Predicted', color='red')
plt.legend(loc='upper left')
plt.show()
# Initialize variables for future prediction
future_days = 120 # Number of days to predict into the future
future_predictions = []
last_60_days_scaled = scaled_data[-60:] # Most recent 60 days of data
# Loop to predict future prices
for i in range(future_days):
last_60_days_reshaped = np.reshape(last_60_days_scaled, (1, 60, 1))
next_day_prediction_scaled = model.predict(last_60_days_reshaped)
next_day_prediction = scaler.inverse_transform(next_day_prediction_scaled)[0][0]
future_predictions.append(next_day_prediction)
last_60_days_scaled = np.append(last_60_days_scaled[1:], next_day_prediction_scaled, axis=0)
# Create a DataFrame to store the future predictions
future_dates = [data.index[-1] + timedelta(days=i+1) for i in range(future_days)]
future_predictions_df = pd.DataFrame(future_predictions, columns=['Predicted'], index=future_dates)
future_predictions_df.head()
# Plot the historical and future predicted values
plt.figure(figsize=(16, 8))
plt.title('S&P 500 Future Price Prediction')
plt.xlabel('Date')
plt.ylabel('Close Price (USD)')
plt.plot(data['Close'], label='Historical', color='blue')
plt.plot(future_predictions_df['Predicted'], label='Future Predicted', color='red')
plt.legend(loc='upper left')
plt.show()