Sleep Duration vs. Happiness
Erin Weafer and Olivia Siracusa
Explore and Merge Datasets
import pandas as pd
Run to view results
# Import datasets
happiness = pd.read_csv('/work/WorldHappiness2019.csv')
timeuse = pd.read_csv('/work/Time-Use-in-OECD-Countries-OECD.csv')
coordinates_codes = pd.read_csv('/work/countries_codes_and_coordinates.csv')
Run to view results
# Explore happiness dataframe
happiness
Run to view results
# Explore timeuse dataframe
timeuse
Run to view results
# Explore coordinates dataframe
coordinates_codes
Run to view results
# timeuse: pivot the time-use dataframe to create one row per country
timeuse2 = timeuse.pivot_table(
index = 'Country',
columns = 'Category',
values = 'Time (minutes)')
# time_sleep: rename, only select Sleep column
time_sleep = timeuse2['Sleep'].reset_index()
Run to view results
# Ensure dataframe is what we want
time_sleep
Run to view results
# time_sleep: clean and replace mismatched country names
time_sleep['Country'] = time_sleep['Country'].str.strip()
fix_names = {
"Italy": "Italy",
"Norway": "Norway",
"Korea": "South Korea",
"UK": "United Kingdom",
"USA": "United States"
}
time_sleep['Country'] = time_sleep['Country'].replace(fix_names)
Run to view results
# Ensure dataframe is clean
time_sleep
Run to view results
# latitudes: select needed columns
latitudes = coordinates_codes[['Country', 'Latitude (average)']].set_index('Country')
# latitudes: convert latitude average to numeric and clean column
latitudes['Latitude (average)'] = pd.to_numeric(
latitudes['Latitude (average)']
.str.replace('"', '')
.str.strip())
# latitudes: rename for easier handling
latitudes = latitudes.rename(
columns = {'Latitude (average)': 'Latitude avg',
'Country': 'Country'})
Run to view results
# Ensure dataframe is what we want
latitudes
Run to view results
# Merge datasets
merged = (
happiness[['Overall rank','Country or region', 'Score']]
.merge(time_sleep, how = 'inner', left_on = 'Country or region', right_on = 'Country')
.merge(latitudes, how = 'left', left_on = 'Country', right_index = True)
)
merged = merged[['Overall rank','Country or region','Score','Sleep','Latitude avg']]
Run to view results
# Ensure dataframe merged correctly
merged
Run to view results
# Export merged dataset
merged.to_csv('final_merged_dataset.csv')
Run to view results
Data Visualization and Analysis
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
Run to view results
# Import merged dataset
merged = pd.read_csv('/work/final_merged_dataset.csv').drop(['Unnamed: 0'], axis = 1)
merged
Run to view results
# Scatterplot: Sleep vs Happiness (Score)
plt.figure(figsize = (8,6))
sns.scatterplot(data = merged, x = 'Sleep', y = 'Score')
plt.title('Sleep (minutes/day) vs Happiness Score')
plt.xlabel('Sleep (minutes per day)')
plt.ylabel('Happiness Score')
plt.grid(True)
plt.show()
Run to view results
# Scatterplot - Latitude vs Happiness (Score)
plt.figure(figsize = (8,6))
sns.scatterplot(data = merged, x = 'Latitude avg', y = 'Score')
plt.title('Latitude vs Happiness Score')
plt.xlabel('Latitude (average)')
plt.ylabel('Happiness Score')
plt.grid(True)
plt.show()
Run to view results
# Correlation Table
corr_matrix = merged[['Score','Sleep','Latitude avg']].corr()
corr_matrix
Run to view results
# Fit the MLR model
model = smf.ols("Score ~ Sleep + Q('Latitude avg')", data = merged).fit()
model.summary()
Run to view results