Hypothesis formulation
Testing the relationship between luxury, retail goods stock and inflation rate
!pip install yfinance
import yfinance as yf
import pandas as pd
import numpy as np
# Define ETF symbols
#Luxury portfolio
#Amundi Index Solutions - Amundi S&P Global Luxury
luxury_etf_symbol1 = 'GLUX.DE'
# Global X Millennials Consumer ETF
luxury_etf_symbol2= 'MILN'
#Retail portfolio
#VanEck Retail ETF
retail_etf_symbol1 = 'RTH'
#SPDR S&P Retail ETF
retail_etf_symbol2 = 'XRT'
# Fetch ETF data from Yahoo Finance
luxury_etf1 = yf.Ticker(luxury_etf_symbol1)
luxury_etf2 = yf.Ticker(luxury_etf_symbol2)
retail_etf1 = yf.Ticker(retail_etf_symbol1)
retail_etf2 = yf.Ticker(retail_etf_symbol2)
# Download historical data
start_date = '2018-11-01'
end_date = '2023-11-01'
luxury_data1 = luxury_etf1.history(start=start_date, end=end_date)
luxury_data2 = luxury_etf2.history(start=start_date, end=end_date)
retail_data1 = retail_etf1.history(start=start_date, end=end_date)
retail_data2 = retail_etf2.history(start=start_date, end=end_date)
# Inspect the available columns
print(luxury_data1.columns)
print(retail_data1.columns)
#Cleaning data
luxury_data1.index = luxury_data1.index.date
luxury_data1 = luxury_data1.drop(columns=['High', 'Low', 'Stock Splits', 'Capital Gains', 'Dividends'])
print(luxury_data1)
luxury_data2.index = luxury_data2.index.date
luxury_data2 = luxury_data2.drop(columns=['High', 'Low', 'Stock Splits', 'Capital Gains', 'Dividends'])
print(luxury_data2)
retail_data1.index = retail_data1.index.date
retail_data1 = retail_data1.drop(columns=['High', 'Low', 'Stock Splits', 'Capital Gains', 'Dividends'])
print(retail_data1)
retail_data2.index = retail_data2.index.date
retail_data2 = retail_data2.drop(columns=['High', 'Low', 'Stock Splits', 'Capital Gains', 'Dividends'])
print(retail_data2)
# Extract adjusted closing prices (you can choose other columns if needed)
luxury_prices1 = luxury_data1['Close']
luxury_prices2 = luxury_data2['Close']
retail_prices1 = retail_data1['Close']
retail_prices2 = retail_data2['Close']
# Create a DataFrame with ETF prices
etf_data = pd.DataFrame({
luxury_etf_symbol1: luxury_prices1,
luxury_etf_symbol2: luxury_prices2,
retail_etf_symbol1: retail_prices1,
retail_etf_symbol2: retail_prices2,
})
# Calculate daily returns
etf_returns = etf_data.pct_change().dropna()
print(etf_returns)
# Print the mean
print("mean : ", etf_returns.mean())
print("*"*40)
# Print the standard deviation
print("Std. dev : ", etf_returns.std())
print("*"*40)
# Print the skewness
print("skew : ", etf_returns.skew())
print("*"*40)
# Print the kurtosis
print("kurt : ", etf_returns.kurtosis())
print("*"*40)
# Equal weights for the portfolios
luxury_weight = 0.5
retail_weight = 0.5
# Calculate the portfolio returns
portfolio_returns_luxury =(etf_returns[luxury_etf_symbol1] * luxury_weight + etf_returns[luxury_etf_symbol2] * luxury_weight)
portfolio_returns_retail=(etf_returns[retail_etf_symbol1] * retail_weight + etf_returns[retail_etf_symbol2] * retail_weight)
print("Daily Luxury Portfolio return: ", portfolio_returns_luxury)
print("Daily Retail Portfolio return: ", portfolio_returns_retail)
# Calculate cumulative returns
cumulative_returns_luxury = (1 + portfolio_returns_luxury).cumprod()
cumulative_returns_retail = (1 + portfolio_returns_retail).cumprod()
print("Cumulative Luxury Portfolio return: ", cumulative_returns_luxury)
print("Cumulative Retail Portfolio return: ", cumulative_returns_retail)
print(portfolio_returns_luxury)
print(portfolio_returns_retail)
# Plot the cumulative returns
import matplotlib.pyplot as plt
cumulative_returns_luxury.plot(label='Luxury Portfolio', color='blue')
cumulative_returns_retail.plot(label='Retail Portfolio', color='orange')
plt.title('Portfolio Cumulative Returns')
plt.legend()
plt.xlabel('Date')
plt.ylabel('Cumulative Returns')
plt.show()
import numpy as np
# Risk-free rate (10-year Treasury yield) is 4.566% as of 8th Nov 2023
risk_free_rate = 0.04566
# Calculate the annualized return of the portfolio
cumulative_returns_luxury = portfolio_returns_luxury[-1]
n1 = len(portfolio_returns_luxury) # Number of trading days in the data
annualized_return_luxury = (cumulative_returns_luxury ** (1/n1) - 1)
# Calculate the annualized return of the portfolio
cumulative_returns_retail = portfolio_returns_retail[-1]
n2 = len(portfolio_returns_retail) # Number of trading days in the data
annualized_return_retail = (cumulative_returns_retail ** (1/n2) - 1)
# Calculate the annualized volatility (standard deviation) of portfolio returns
portfolio_returns_luxury = portfolio_returns_luxury.dropna()
portfolio_returns_retail = portfolio_returns_retail.dropna()
annualized_volatility_luxury = portfolio_returns_luxury.std() * np.sqrt(252) # Assuming 252 trading days in a year
annualized_volatility_retail = portfolio_returns_retail.std() * np.sqrt(252) # Assuming 252 trading days in a year
# Calculate the excess return
excess_return_luxury = annualized_return_luxury - risk_free_rate
excess_return_retail = annualized_return_retail - risk_free_rate
# Calculate the Sharpe ratio
sharpe_ratio_luxury = excess_return_luxury / annualized_volatility_luxury
sharpe_ratio_retail = excess_return_retail / annualized_volatility_retail
print(f"Portfolio Luxury Annualized Return: {annualized_return_luxury:.2%}")
print(f"Portfolio Annualized Luxury Volatility: {annualized_volatility_luxury:.2%}")
print(f"Sharpe Ratio Luxury: {sharpe_ratio_luxury:.2f}")
print(f"Portfolio Retail Annualized Return: {annualized_return_retail:.2%}")
print(f"Portfolio Annualized Retail Volatility: {annualized_volatility_retail:.2%}")
print(f"Sharpe Ratio Retail: {sharpe_ratio_retail:.2f}")
Maximum Sharpe portfolio
!pip install pyfolio
import pyfolio as pf
Minimum volatility portfolio
Economic Indicators
World inflation rate data
#input the global inflation rate--using the headline consumer price index
!pip install --upgrade pip
import pandas as pd
!pip install openpyxl
inflation_rate=pd.read_excel('Inflation-Rate.xlsx')
# Calculate the world's monthly inflation rate by summing up all country's inflation rates
inflation_rate.dropna()
selected_columns = inflation_rate.iloc[:, 591:-4]
# Calculate the average for each time period
# Replace non-numeric values (e.g., string values) with NaN
selected_columns = selected_columns.apply(pd.to_numeric, errors='coerce')
selected_columns=pd.DataFrame(selected_columns)
average_inflation = selected_columns.mean(axis=0)
# Assuming average_inflation is your Series
average_inflation_df = average_inflation.reset_index()
# Rename the columns for better clarity
average_inflation_df.columns = ['Datetime', 'Average Inflation Rate']
average_inflation_df['Datetime'] = pd.to_datetime(average_inflation_df['Datetime'], format='%Y%m')
average_inflation_df.index=average_inflation_df['Datetime']
# Display the updated DataFrame
print(average_inflation_df)
World Unemployment rate data
#Downloading unemployment rate
unemployment_data=pd.read_excel('unemployment.xlsx')
# Reading the Excel file into a pandas DataFrame
unemployment_data = pd.read_excel('unemployment.xlsx', index_col=0) # Assuming 'Country' is in the first column
# Droping rows with NaN values
unemployment_data = unemployment_data.dropna()
# Converting the column names to datetime objects
unemployment_data.columns = pd.to_datetime(unemployment_data.columns, errors='coerce')
# Extracting columns from May-2011 to Sep-2023, including May-2011 and excluding the last column
start_month = pd.to_datetime('2018-11-01')
end_month = pd.to_datetime('2023-09-01')
# Filtering columns based on date range
selected_columns2 = unemployment_data.columns[(unemployment_data.columns >= start_month) & (unemployment_data.columns <= end_month)]
# Selecting columns within the specified date range, including May-2011 to Sep-2023
selected_data = unemployment_data[selected_columns2]
selected_data=selected_data.apply(pd.to_numeric, errors='coerce')
# Calculating the average for each month
average_unemployment_per_month = selected_data.mean()
# Creating a new DataFrame with the average unemployment rate for each month
average_unemployment_df = pd.DataFrame(average_unemployment_per_month)
average_unemployment_df.columns = ['Average Unemployment Rate']
# Assuming your DataFrame is named 'average_unemployment_df'
average_unemployment_df['Datetime'] = average_unemployment_df.index
# Displaying the new DataFrame
print(average_unemployment_df)
World interest rate data
Using monthly long-term interest rates of Australia, Canada, Switzerland, Eurozone (19 countries), the U.K, Japan, Norway, New Zealand, Sweden, and the U.S to find the average global monthly interest rate
#Import csv file with monthly long-term interest rate data
interest_rate_data = pd.read_csv('global interest rate.csv')
df = interest_rate_data
#Data cleaning process
#Drop unnecessary columns
df = df.drop(columns=['INDICATOR', 'SUBJECT', 'MEASURE', 'FREQUENCY', 'Flag Codes'])
#Convert column 'TIME' to datetime
df['TIME'] = pd.to_datetime(df['TIME'], format='%Y-%m')
#Set 'Date' as index for both dataframes
df.set_index('TIME', inplace=True)
#Create pivot table
df = df.rename(columns={'LOCATION': 'Country'})
df_pivoted = df.pivot_table(index=df.index, columns='Country', values='Value')
#Dropping countries with incomplete data
df_cleaned = df_pivoted.dropna(axis=1)
#Dropping individual EA19 data sets and only using relevant countries
df_final = df_cleaned.drop(columns=['BRA', 'BEL', 'DEU', 'ESP', 'FRA', 'HUN', 'ISR', 'PRT', 'ZAF'])
# Assuming df_final is your DataFrame
# Convert the index to datetime if it's not already
df_final.index = pd.to_datetime(df_final.index)
# Specify the date range to keep
start_date = '2000-02-01'
end_date = '2018-10-01'
# Use boolean indexing to keep rows outside the specified date range
df_final = df_final[(df_final.index < start_date) | (df_final.index > end_date)]
#Add column of annual average rate
df_final['Average'] = df_final.mean(axis=1)
print("Cleaned data of interest rates globally:")
print(df_final)
#Visualise
import matplotlib.pyplot as plt
for country in df_final.columns:
plt.plot(df_final.index, df_final[country])
plt.plot(df_final.index, df_final['Average'], label='Average')
plt.xlabel('Time')
plt.ylabel('Monthly long-term interest rates (%)')
plt.title('Global monthly long-term interest rates')
plt.show()
cci
#Import csv file with monthly long-term interest rate data
cci_data = pd.read_csv('CCI.csv')
df_cci = cci_data
#Data cleaning process
#Drop unnecessary columns
df_cci = df_cci.drop(columns=['INDICATOR', 'SUBJECT', 'MEASURE', 'FREQUENCY', 'Flag Codes'])
#Convert column 'TIME' to datetime
df_cci['TIME'] = pd.to_datetime(df_cci['TIME'], format='%Y-%m')
#Set 'Date' as index for both dataframes
df_cci.set_index('TIME', inplace=True)
#Create pivot table
df_cci = df.rename(columns={'LOCATION': 'Country'})
df_cci = df_cci.pivot_table(index=df.index, columns='Country', values='Value')
#Dropping countries with incomplete data
df_cci = df_cci.dropna(axis=1)
#Dropping individual EA19 data sets and only using relevant countries
df_cci = df_cci.drop(columns=['BRA', 'BEL', 'DEU', 'ESP', 'FRA', 'HUN', 'ISR', 'PRT', 'ZAF'])
# Convert the index to datetime
df_cci.index = pd.to_datetime(df_cci.index)
# Specify the date range to keep
start_date = '2000-02-01'
end_date = '2018-10-01'
# Use boolean indexing to keep rows outside the specified date range
df_cci = df_cci[(df_cci.index < start_date) | (df_cci.index > end_date)]
#Add column of annual average rate
df_cci['Average cci'] = df_cci.mean(axis=1)
print("Cleaned data of CCI globally:")
print(df_cci)
#Visualise
import matplotlib.pyplot as plt
for country in df_cci.columns:
plt.plot(df_cci.index, df_cci[country])
plt.plot(df_cci.index, df_cci['Average cci'], label='Average cci')
plt.xlabel('Time')
plt.ylabel('Monthly CCI')
plt.title('Global monthly CCI')
plt.show()
csi
#Import csv file with monthly CSI data
csi_data = pd.read_csv('CSI.csv')
df_csi = csi_data
#Data cleaning process
#Drop unnecessary columns
df_csi = df_csi.drop(columns=['INDICATOR', 'SUBJECT', 'MEASURE', 'FREQUENCY', 'Flag Codes'])
#Convert column 'TIME' to datetime
df_csi['TIME'] = pd.to_datetime(df_csi['TIME'], format='%Y-%m')
#Set 'Date' as index for both dataframes
df_csi.set_index('TIME', inplace=True)
#Create pivot table
df_csi = df.rename(columns={'LOCATION': 'Country'})
df_csi = df_csi.pivot_table(index=df.index, columns='Country', values='Value')
#Dropping countries with incomplete data
df_csi = df_csi.dropna(axis=1)
#Dropping individual EA19 data sets and only using relevant countries
df_csi = df_csi.drop(columns=['BRA', 'BEL', 'DEU', 'ESP', 'FRA', 'HUN', 'ISR', 'PRT', 'ZAF'])
# Convert the index to datetime
df_csi.index = pd.to_datetime(df_csi.index)
# Specify the date range to keep
start_date = '2000-02-01'
end_date = '2018-10-01'
# Use boolean indexing to keep rows outside the specified date range
df_csi = df_csi[(df_csi.index < start_date) | (df_csi.index > end_date)]
#Add column of monthly average rate
df_csi['Average csi'] = df_csi.mean(axis=1)
print("Cleaned data of CSI globally:")
print(df_csi)
#Visualise
import matplotlib.pyplot as plt
for country in df_csi.columns:
plt.plot(df_csi.index, df_csi[country])
plt.plot(df_csi.index, df_csi['Average csi'], label='Average csi')
plt.xlabel('Time')
plt.ylabel('Monthly CSI')
plt.title('Global monthly CSI')
plt.show()
us consumption
us_consumption_data=pd.read_excel('US_consumption.xlsx', index_col=0)
# Droping rows with NaN values
us_consumption_data = us_consumption_data.dropna()
# Converting the column names to datetime objects
us_consumption_data.columns = pd.to_datetime(us_consumption_data.columns, errors='coerce')
us_consumption_data.index=pd.to_datetime(us_consumption_data.index)
us_consumption_data['us consumption']=us_consumption_data['NaT']
print(us_consumption_data)
plot time series of economic indicator
!pip install statsmodels
import statsmodels.api as sm
portfolio_returns_luxury=pd.DataFrame(portfolio_returns_luxury)
portfolio_returns_luxury.columns = ['portfolio returns luxury']
# Print the updated DataFrame
print(portfolio_returns_luxury)
# Print the updated DataFrame
portfolio_returns_luxury=pd.DataFrame(portfolio_returns_luxury)
portfolio_returns_luxury.columns = ['portfolio returns luxury']
# Print the updated DataFrame
print(portfolio_returns_luxury)
portfolio_returns_retail=pd.DataFrame(portfolio_returns_retail)
portfolio_returns_retail.columns = ['portfolio returns retail']
# Print the updated DataFrame
print(portfolio_returns_retail)
#get the average monthly return of the portfolio
portfolio_returns_luxury.index = pd.to_datetime(portfolio_returns_luxury.index)
portfolio_returns_luxury=pd.DataFrame(portfolio_returns_luxury)
# Change the datetime index to the first day of each month
portfolio_returns_luxury.index = portfolio_returns_luxury.index.to_period('M').to_timestamp('M')
overall_luxury_average_monthly_return = portfolio_returns_luxury.resample('M').mean()
print(overall_luxury_average_monthly_return)
#get the average monthly return of the portfolio
portfolio_returns_retail.index = pd.to_datetime(portfolio_returns_retail.index)
portfolio_returns_retail=pd.DataFrame(portfolio_returns_retail)
# Change the datetime index to the first day of each month
portfolio_returns_retail.index = portfolio_returns_retail.index.to_period('M').to_timestamp('M')
overall_retail_average_monthly_return = portfolio_returns_retail.resample('M').mean()
print(overall_retail_average_monthly_return)
# Assuming your DataFrame is named 'overall_luxury_average_monthly_return'
overall_luxury_average_monthly_return.index = pd.to_datetime(overall_luxury_average_monthly_return.index)
# Create new columns 'Year' and 'Month' with the year and month extracted from the index
overall_luxury_average_monthly_return['Year'] = overall_luxury_average_monthly_return.index.year
overall_luxury_average_monthly_return['Month'] = overall_luxury_average_monthly_return.index.month
# Merge 'Year' and 'Month' into a single column 'YearMonth'
overall_luxury_average_monthly_return['YearMonth'] = overall_luxury_average_monthly_return['Year'].astype(str) + '-' + overall_luxury_average_monthly_return['Month'].astype(str)
# Drop 'Year' and 'Month' columns if you don't need them anymore
overall_luxury_average_monthly_return = overall_luxury_average_monthly_return.drop(['Year', 'Month'], axis=1)
overall_luxury_average_monthly_return['Datetime'] = pd.to_datetime(overall_luxury_average_monthly_return['YearMonth'], format='%Y-%m')
# Print the updated DataFrame
overall_luxury_average_monthly_return.index=overall_luxury_average_monthly_return['Datetime']
print(overall_luxury_average_monthly_return)
# Assuming your DataFrame is named 'overall_retail_average_monthly_return'
overall_retail_average_monthly_return.index = pd.to_datetime(overall_retail_average_monthly_return.index)
# Create new columns 'Year' and 'Month' with the year and month extracted from the index
overall_retail_average_monthly_return['Year'] = overall_retail_average_monthly_return.index.year
overall_retail_average_monthly_return['Month'] = overall_retail_average_monthly_return.index.month
# Merge 'Year' and 'Month' into a single column 'YearMonth'
overall_retail_average_monthly_return['YearMonth'] = overall_retail_average_monthly_return['Year'].astype(str) + '-' + overall_retail_average_monthly_return['Month'].astype(str)
# Drop 'Year' and 'Month' columns if you don't need them anymore
overall_retail_average_monthly_return = overall_retail_average_monthly_return.drop(['Year', 'Month'], axis=1)
overall_retail_average_monthly_return['Datetime'] = pd.to_datetime(overall_retail_average_monthly_return['YearMonth'], format='%Y-%m')
# Print the updated DataFrame
overall_retail_average_monthly_return.index=overall_retail_average_monthly_return['Datetime']
print(overall_retail_average_monthly_return)
data=pd.concat([average_inflation_df, average_unemployment_df,overall_luxury_average_monthly_return, overall_retail_average_monthly_return, df_final,df_cci,df_csi,us_consumption_data], axis=1)
columns_to_delete = ['Datetime', 'YearMonth', 'AUS', 'CAN','CHE', 'EA19', 'GBR', 'JPN', 'NOR','NZL','SWE','USA']
data = data.drop(columns_to_delete, axis=1)
data['log Inflation Rate']=np.log(data['Average Inflation Rate'])
data['log cci']=np.log(data['Average cci'])
data['log csi']=np.log(data['Average csi'])
data.dropna(inplace=True)
print(data)
# Define your independent variable (X) and dependent variable (y)
X1 = sm.add_constant(data[['Average', 'Average Unemployment Rate','log Inflation Rate', 'Average csi','us consumption']])
y = data['portfolio returns luxury']
# Fit the regression model
model = sm.OLS(y, X1).fit()
# Print the regression results
print(model.summary())
# Define your independent variable (X) and dependent variable (y) del average inflation rate
X1 = sm.add_constant(data[['Average Unemployment Rate','Average','us consumption']])
y = data['portfolio returns luxury']
# Fit the regression model
model = sm.OLS(y, X1).fit()
# Print the regression results
print(model.summary())
# Define your independent variable (X) and dependent variable (y)
X2 = sm.add_constant(data[['Average','Average Unemployment Rate','us consumption']])
y = data['portfolio returns retail']
# Fit the regression model
model = sm.OLS(y, X2).fit()
# Print the regression results
print(model.summary())
# Define your independent variable (X) and dependent variable (y) del average inflation rate
X2 = sm.add_constant(data[['Average Unemployment Rate', 'Average','us consumption']])
y = data['portfolio returns retail']
# Fit the regression model
model = sm.OLS(y, X2).fit()
# Print the regression results
print(model.summary())