# Import necessary libraries
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import seaborn as sns
# Suppress warnings
warnings.filterwarnings('ignore')
# Set the default Seaborn style
sns.set_theme(style='whitegrid', font='serif')
# Custom formatter function
def format_yaxis(ax):
ax.yaxis.set_major_formatter(mticker.ScalarFormatter())
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{int(x):,}'))
data = pd.read_csv("/kaggle/input/weekly-hospital-respiratory-data-and-metrics/raw_weekly_hospital_respiratory_data_2020_2024.csv")
# Display the first few rows of the data
data.head()
# Remove leading and trailing whitespaces from column names
data.columns = data.columns.str.strip()
# Display summary statistics
data.describe().T
# Display data types of each column
data.dtypes.T
# Display the shape of the data
data.shape
# Convert 'Week Ending Date' to datetime
data['Week Ending Date'] = pd.to_datetime(data['Week Ending Date'])
# Convert percentages from 0.25 to 25%
data['Percent Inpatient Beds Occupied by COVID-19 Patients'] *= 100
# Set the figure size
plt.figure(figsize=(15, 6))
# Use Seaborn to plot the data
sns.lineplot(x='Week Ending Date', y='Percent Inpatient Beds Occupied by COVID-19 Patients', data=data, marker='o', color='red')
# Set the title, x-axis label, and y-axis label
plt.title('Percentage of Inpatient Beds Occupied by COVID-19 Patients')
plt.xlabel('Week Ending Date')
plt.ylabel('Percentage Occupied')
# Customize x-axis ticks to show only the years
data['Week Ending Date'] = pd.to_datetime(data['Week Ending Date'])
# Get unique years and their first occurrences
unique_years = data['Week Ending Date'].dt.year.unique()
tick_positions = [data[data['Week Ending Date'].dt.year == year]['Week Ending Date'].iloc[0] for year in unique_years]
tick_labels = unique_years
# Set the x-axis ticks and labels
plt.xticks(ticks=tick_positions, labels=tick_labels)
# Add grid lines
plt.grid(True, linestyle='--', alpha=0.7)
# Display the plot
plt.grid(True)
plt.tight_layout()
plt.show()
# Convert 'Week Ending Date' to datetime
data['Week Ending Date'] = pd.to_datetime(data['Week Ending Date'])
# Convert percentages from 0.25 to 25%
data['Percent ICU Beds Occupied by COVID-19 Patients'] *= 100
data['Percent ICU Beds Occupied by Influenza Patients'] *= 100
data['Percent ICU Beds Occupied by RSV Patients'] *= 100
# Set the figure size
plt.figure(figsize=(15, 6))
# Use Seaborn to plot the data
sns.lineplot(x='Week Ending Date', y='Percent ICU Beds Occupied by COVID-19 Patients', data=data, marker='o', label='COVID-19', color='blue')
sns.lineplot(x='Week Ending Date', y='Percent ICU Beds Occupied by Influenza Patients', data=data, marker='o', label='Influenza', color='orange')
sns.lineplot(x='Week Ending Date', y='Percent ICU Beds Occupied by RSV Patients', data=data, marker='o', label='RSV', color='green')
# Set the title, x-axis label, and y-axis label
plt.title('Comparison of ICU Bed Occupancy by Disease (COVID-19, Influenza, RSV)')
plt.xlabel('Week Ending Date')
plt.ylabel('Percentage Occupied')
# Customize x-axis ticks to show only the years
unique_years = data['Week Ending Date'].dt.year.unique()
tick_positions = [data[data['Week Ending Date'].dt.year == year]['Week Ending Date'].iloc[0] for year in unique_years]
tick_labels = unique_years
plt.xticks(ticks=tick_positions, labels=tick_labels)
# Add grid lines
plt.grid(True, linestyle='--', alpha=0.7)
# Display the plot
plt.grid(True)
plt.tight_layout()
plt.legend()
plt.show()
# Convert 'Week Ending Date' to datetime
data['Week Ending Date'] = pd.to_datetime(data['Week Ending Date'])
# Set the figure size
plt.figure(figsize=(15, 6))
# Use Seaborn to plot the data
sns.lineplot(x='Week Ending Date', y='Total Patients Hospitalized with COVID-19', data=data, marker='o', color='blue')
# Set the title, x-axis label, and y-axis label
plt.title('Total Patients Hospitalized with COVID-19 Over Time')
plt.xlabel('Week Ending Date')
plt.ylabel('Number of Patients')
# Customize x-axis ticks to show only the years
unique_years = data['Week Ending Date'].dt.year.unique()
tick_positions = [data[data['Week Ending Date'].dt.year == year]['Week Ending Date'].iloc[0] for year in unique_years]
tick_labels = unique_years
plt.xticks(ticks=tick_positions, labels=tick_labels)
# Add grid lines
plt.grid(True, linestyle='--', alpha=0.7)
# Display the plot
plt.grid(True)
plt.tight_layout()
plt.show()
# Create a DataFrame for the distribution
ped_covid_admissions = pd.DataFrame({
'Age Group': ['0-4 years', '5-17 years'],
'Admissions': [data['Number of Pediatric COVID-19 Admissions, 0-4 years'].sum(), data['Number of Pediatric COVID-19 Admissions, 5-17 years'].sum()]
})
# Set the figure size
plt.figure(figsize=(10, 6))
# Use Seaborn to plot the data
ax = sns.barplot(x='Age Group', y='Admissions', data=ped_covid_admissions, palette='viridis')
# Set the title, x-axis label, and y-axis label
plt.title('Distribution of Pediatric COVID-19 Admissions by Age Group', fontsize=16)
plt.xlabel('Age Group', fontsize=14)
plt.ylabel('Number of Admissions', fontsize=14)
# Add data labels on top of the bars
for p in plt.gca().patches:
plt.gca().annotate(f'{int(p.get_height()):,}', (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=12, color='black', xytext=(0, 5),
textcoords='offset points')
# Add grid lines
plt.grid(True, linestyle='--', alpha=0.7)
# Apply the custom y-axis formatter
format_yaxis(ax)
# Display the plot
plt.tight_layout()
plt.show()
# Create a DataFrame for the comparison
icu_patients = pd.DataFrame({
'Disease': ['COVID-19', 'Influenza', 'RSV'],
'Total ICU Patients': [data['Total ICU Patients Hospitalized with COVID-19'].sum(), data['Total ICU Patients Hospitalized with Influenza'].sum(), data['Total ICU Patients Hospitalized with RSV'].sum()]
})
# Set the figure size
plt.figure(figsize=(10, 6))
# Use Seaborn to plot the data
ax = sns.barplot(x='Disease', y='Total ICU Patients', data=icu_patients, palette='Set2')
# Set the title, x-axis label, and y-axis label
plt.title('Comparison of Total ICU Patients Hospitalized with COVID-19, Influenza, and RSV', fontsize=16)
plt.xlabel('Disease', fontsize=14)
plt.ylabel('Total ICU Patients', fontsize=14)
# Add data labels on top of the bars
for p in plt.gca().patches:
plt.gca().annotate(f'{int(p.get_height()):,}', (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=12, color='black', xytext=(0, 5),
textcoords='offset points')
# Add grid lines
plt.grid(True, linestyle='--', alpha=0.7)
# Apply the custom y-axis formatter
format_yaxis(ax)
# Display the plot
plt.tight_layout()
plt.show()
# Calculate percentages
total_adult_covid_admissions = data['Total Adult COVID-19 Admissions'].sum()
total_ped_covid_admissions = data['Total Pediatric COVID-19 Admissions'].sum()
total_covid_admissions = total_adult_covid_admissions + total_ped_covid_admissions
percent_adult = (total_adult_covid_admissions / total_covid_admissions) * 100
percent_ped = (total_ped_covid_admissions / total_covid_admissions) * 100
# Plotting
labels = ['Adult', 'Pediatric']
sizes = [percent_adult, percent_ped]
colors = ['lightcoral', 'lightskyblue']
plt.figure(figsize=(8, 8))
plt.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=140, textprops={'fontsize': 14})
plt.title('Percentage of Adult and Pediatric COVID-19 Admissions', fontsize=16)
plt.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
# Add a legend
plt.legend(labels, loc="best")
# Display the plot
plt.show()
# Group by geographic aggregation
geo_distribution = data.groupby('Geographic aggregation')['Total Patients Hospitalized with COVID-19'].sum().reset_index()
# Set the figure size
plt.figure(figsize=(20, 8))
# Use Seaborn to plot the data
ax = sns.barplot(x='Geographic aggregation', y='Total Patients Hospitalized with COVID-19', data=geo_distribution, palette='viridis')
# Set the title, x-axis label, and y-axis label
plt.title('Geographic Distribution of Total Patients Hospitalized with COVID-19', fontsize=16)
plt.xlabel('Geographic Aggregation', fontsize=14)
plt.ylabel('Total Patients Hospitalized', fontsize=14)
# Rotate x-axis labels for better readability
plt.xticks(rotation=90, ha='right', fontsize=12)
# Add grid lines
plt.grid(axis='y', linestyle='--', alpha=0.7)
# Add data labels on top of the bars
for p in plt.gca().patches:
plt.gca().annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=12, color='black', xytext=(0, 25),
textcoords='offset points', rotation=90)
# Apply the custom y-axis formatter
format_yaxis(ax)
# Display the plot
plt.tight_layout()
plt.show()
# Convert 'Week Ending Date' to datetime
data['Week Ending Date'] = pd.to_datetime(data['Week Ending Date'])
# Set the figure size
plt.figure(figsize=(15, 6))
# Use Seaborn to plot the data
sns.lineplot(x='Week Ending Date', y='Number of ICU Beds Occupied', data=data, marker='o', color='green', linewidth=2.5)
# Set the title, x-axis label, and y-axis label
plt.title('Trends in ICU Bed Occupancy Over Time', fontsize=16)
plt.xlabel('Week Ending Date', fontsize=14)
plt.ylabel('Number of ICU Beds Occupied', fontsize=14)
# Customize x-axis ticks to show only the years
unique_years = data['Week Ending Date'].dt.year.unique()
tick_positions = [data[data['Week Ending Date'].dt.year == year]['Week Ending Date'].iloc[0] for year in unique_years]
tick_labels = unique_years
plt.xticks(ticks=tick_positions, labels=tick_labels, rotation=45, fontsize=12)
# Add grid lines
plt.grid(True, linestyle='--', alpha=0.7)
# Display the plot
plt.tight_layout()
plt.show()
# Create a DataFrame for the comparison
total_admissions = pd.DataFrame({
'Disease': ['COVID-19', 'Influenza', 'RSV'],
'Total Admissions': [data['Total COVID-19 Admissions'].sum(), data['Total Influenza Admissions'].sum(), data['Total RSV Admissions'].sum()]
})
# Set the figure size
plt.figure(figsize=(10, 6))
# Use Seaborn to plot the data
ax = sns.barplot(x='Disease', y='Total Admissions', data=total_admissions, palette='viridis')
# Set the title, x-axis label, and y-axis label
plt.title('Comparison of Total Admissions for COVID-19, Influenza, and RSV', fontsize=16)
plt.xlabel('Disease', fontsize=14)
plt.ylabel('Total Admissions', fontsize=14)
# Add grid lines
plt.grid(axis='y', linestyle='--', alpha=0.7)
# Add data labels on top of the bars
for p in plt.gca().patches:
plt.gca().annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2., p.get_height()),
ha='center', va='center', fontsize=12, color='black', xytext=(0, 5),
textcoords='offset points')
# Apply the custom y-axis formatter
format_yaxis(ax)
# Display the plot
plt.tight_layout()
plt.show()