!pip install pandas seaborn statsmodels

Run to view results

import pandas as pd import seaborn as sns import matplotlib.pyplot as plt

Run to view results

gold_price = pd.read_csv('gold price(1968-2023).csv') gold_price['Date'] = pd.to_datetime(gold_price['Date'], format="%d/%m/%Y") gold_price.rename(columns={'Close': 'Gold Price'}, inplace=True) print(gold_price.head())

Run to view results

cpi = pd.read_csv('US CPI monthly.csv') cpi['Date'] = pd.to_datetime(cpi['Date'], format="%d/%m/%Y") cpi.rename(columns={'Close': 'CPI'}, inplace=True) print(cpi.head())

Run to view results

money_supply = pd.read_csv('US Money Supply (M2) monthly.csv') money_supply['Date'] = pd.to_datetime(money_supply['Date'], format="%d/%m/%Y") def convert_to_float(value): if 'T' in value: return float(value.rstrip('T')) * 1e12 if 'B' in value: return float(value.rstrip('B')) * 1e9 money_supply['Close'] = money_supply['Close'].apply(convert_to_float) money_supply.rename(columns={'Close': 'Money Supply'}, inplace=True) print(money_supply.head())

Run to view results

interest_rate = pd.read_csv('US OVERNIGHT FED FUNDS EFFECT.csv') interest_rate['Date'] = pd.to_datetime(interest_rate['Date'], format="%d/%m/%Y") interest_rate.rename(columns={'Close': 'Interest Rate %'}, inplace=True) print(interest_rate.head())

Run to view results

sp500 = pd.read_csv('S&P 500 Index.csv') sp500['Date'] = pd.to_datetime(sp500['Date'], format="%d-%b-%Y") sp500.rename(columns={'Close': 'S&P 500'}, inplace=True) sp500['S&P 500'] = sp500['S&P 500'].str.replace(',', '').astype(float) print(sp500.head())

Run to view results

world_gdp = pd.read_csv('World GDP yearly.csv', skiprows = 0) world_gdp['Date'] = pd.to_datetime(world_gdp['Date'], format="%d/%m/%Y") world_gdp['Close'] = world_gdp['Close'].apply(convert_to_float) world_gdp.rename(columns={'Close': 'World GDP'}, inplace=True) print(world_gdp.head())

Run to view results

world_uncertainty = pd.read_csv('uncertainty.csv') # Convert 'Year' to datetime format world_uncertainty['Year'] = pd.to_datetime(world_uncertainty['Year'].str.replace('q', '-Q')) # Rename the 'Year' column to 'Date' world_uncertainty = world_uncertainty.rename(columns={'Year': 'Date'}) # Identify string type columns (excluding 'Date' column) string_columns = world_uncertainty.select_dtypes(include=['object']).columns.difference(['Date']) world_uncertainty = world_uncertainty.drop(columns=['Year.1'], errors='ignore') # Sort dataframe in descending order based on 'Date' world_uncertainty = world_uncertainty.sort_values(by='Date', ascending=False).reset_index(drop=True) # Columns to convert columns_to_convert = ['Global (simple average)', 'Global (GDP weighted average)', 'Advanced economies', 'Emerging economies', 'Low-income economies'] # Iterate through each column and remove commas, then convert to float for column in columns_to_convert: world_uncertainty[column] = world_uncertainty[column].str.replace(',', '').astype(float) print(world_uncertainty.head())

Run to view results

# Convert to monthly averages gold_price_monthly = gold_price.resample('M', on='Date').mean() dataframes = [gold_price_monthly, sp500, cpi, money_supply, interest_rate, world_gdp, world_uncertainty] for df in dataframes: # Convert to datetime index if 'Date' is a column if 'Date' in df.columns: df['Date'] = pd.to_datetime(df['Date']) df.set_index('Date', inplace=True) # Convert all columns except 'Date' to numeric for col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') # Resample each dataframe to monthly frequency for i, df in enumerate(dataframes): # Use mean for resampling df_resampled = df.resample('M').mean() # Forward fill the missing values dataframes[i] = df_resampled.fillna(method='ffill') # Merge all dataframes on the date index df_combined = pd.concat(dataframes, axis=1) df_cleaned_and_combined = df_combined.dropna() print(df_cleaned_and_combined.tail(24))

Run to view results

ax = df_cleaned_and_combined['Interest Rate %'].plot(figsize=(10, 6), color='blue', label='Interest Rates') ax2 = ax.twinx() df_cleaned_and_combined['Gold Price'].plot(ax=ax2, color='gold', label='Gold Price USD/oz') ax.set_xlabel("Date") ax2.set_ylabel("Gold Prices USD/oz") ax.set_title("Interest Rates and Gold Prices") # Adding a legend for both axes lines, labels = ax.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax.legend(lines + lines2, labels + labels2, loc='upper left') plt.tight_layout() plt.show()

Run to view results

# Plotting the data to visualize trends df_cleaned_and_combined.plot(subplots=True, figsize=(10, 15)) plt.tight_layout() # Set x-axis label plt.xlabel("Date") plt.show()

Run to view results

# Calculate the correlation matrix correlation_matrix = df_cleaned_and_combined.corr() correlation_matrix = correlation_matrix.round(2) print(correlation_matrix) # Create a correlation heatmap plt.figure(figsize=(12, 10)) sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=.5) plt.title("Correlation Heatmap") plt.show()

Run to view results

# Plotting the data to visualize trends df_cleaned_and_combined['Money Supply'].plot(subplots=True, figsize=(8, 5)) plt.tight_layout() # Set x-axis label plt.xlabel("Date") plt.show()

Run to view results

import statsmodels.api as sm # Defining the dependent variable and independent variables Y = df_cleaned_and_combined['Gold Price'] X = df_cleaned_and_combined[['S&P 500', 'CPI', 'Money Supply', 'Interest Rate %', 'World GDP', 'Global (simple average)', 'Global (GDP weighted average)', 'Advanced economies', 'Emerging economies', 'Low-income economies']] # Adding a constant to the model (it's a best practice!) X = sm.add_constant(X) # Constructing the model model = sm.OLS(Y, X).fit() # Printing the summary print(model.summary())

Run to view results

from statsmodels.stats.outliers_influence import variance_inflation_factor # Calculate VIF for each independent variable vif_data = pd.DataFrame() vif_data["Variable"] = X.columns vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])] print(vif_data)

Run to view results

There is strong multicollinearity. Any VIF value higher than 10 indicates multicollinearity.

World GDP and S&P500 are dropped as they are strongly correlated with the money supply. Global (GDP weighted average) will remain as the only measure of uncertainty. Feel free to play with the other measures if needed

# Defining the dependent variable and independent variables Y = df_cleaned_and_combined['Gold Price'] X = df_cleaned_and_combined[['CPI', 'Money Supply', 'Interest Rate %', 'Global (GDP weighted average)']] #'Advanced economies', 'Emerging economies', 'Low-income economies']] # Adding a constant to the model (it's a best practice!) X = sm.add_constant(X) # Constructing the model model = sm.OLS(Y, X).fit() # Printing the summary print(model.summary())

Run to view results

vif_data = pd.DataFrame() vif_data["Variable"] = X.columns vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])] print(vif_data)

Run to view results

#Testin for heteroscedasticity from statsmodels.stats.diagnostic import het_breuschpagan # Assuming you have already fit your regression model as 'model' residuals = model.resid exog = model.model.exog bp_test = het_breuschpagan(residuals, exog) labels = ['LM Statistic', 'LM-Test p-value', 'F-Statistic', 'F-Test p-value'] print(dict(zip(labels, bp_test)))

Run to view results

There is no heteroscedasticity. All values above 0.05

Testing acorss different interest rate periods

columns_to_drop = ['S&P 500', 'World GDP', 'Advanced economies', 'Emerging economies', 'Low-income economies', 'Global (simple average)'] # Create the new dataframe df_new = df_cleaned_and_combined.drop(columns=columns_to_drop) #Can change the threshold threshold = 1.5

Run to view results

# Splitting data based on the threshold df_high_interest = df_new[df_new['Interest Rate %'] > threshold] df_low_interest = df_new[df_new['Interest Rate %'] <= threshold] # Regression for high interest rate period X_high = df_high_interest.drop('Gold Price', axis=1) X_high = sm.add_constant(X_high) model_high = sm.OLS(df_high_interest['Gold Price'], X_high).fit() print("High Interest Rate Period Regression Results:") print(model_high.summary()) # Regression for low interest rate period X_low = df_low_interest.drop('Gold Price', axis=1) X_low = sm.add_constant(X_low) model_low = sm.OLS(df_low_interest['Gold Price'], X_low).fit() print("\nLow Interest Rate Period Regression Results:") print(model_low.summary())

Run to view results