import pandas as pd
# Assuming you have a DataFrame named 'pwt_data' with the relevant columns
# Capital per Capita
pwt_data['Capital_per_Capita'] = pwt_data['Capital_Stock'] / pwt_data['Population']
# Capital per Worker
pwt_data['Capital_per_Worker'] = pwt_data['Capital_Stock'] / pwt_data['Number_of_Persons_Engaged']
# Capital per Effective Worker
pwt_data['Capital_per_Effective_Worker'] = pwt_data['Capital_Stock'] / (pwt_data['Number_of_Persons_Engaged'] * pwt_data['Average_Annual_Hours_Worked_per_Person_Engaged'])
# Capital per Unit of Effective Labor
pwt_data['Capital_per_Unit_of_Effective_Labor'] = pwt_data['Capital_Stock'] / (pwt_data['Human_Capital_Index'] * pwt_data['Average_Annual_Hours_Worked_per_Person_Engaged'])
# GDP per Capita
pwt_data['GDP_per_Capita'] = pwt_data['GDP_at_Constant_2017_National_Prices'] / pwt_data['Population']
# GDP per Worker
pwt_data['GDP_per_Worker'] = pwt_data['GDP_at_Constant_2017_National_Prices'] / pwt_data['Number_of_Persons_Engaged']
# GDP per Human Capital
pwt_data['GDP_per_Human_Capital'] = pwt_data['GDP_at_Constant_2017_National_Prices'] / pwt_data['Human_Capital_Index']
# GDP per Unit of Effective Labor
pwt_data['GDP_per_Unit_of_Effective_Labor'] = pwt_data['GDP_at_Constant_2017_National_Prices'] / (pwt_data['Human_Capital_Index'] * pwt_data['Average_Annual_Hours_Worked_per_Person_Engaged'])
# Display the updated DataFrame
print(pwt_data)
import matplotlib.pyplot as plt
# Assuming you have a DataFrame named 'pwt_data' with the relevant columns
# Define the years of interest
years_of_interest = [1950, 1970, 1990, 2010, pwt_data['Year'].max()]
# Loop through the years and create scatter plots
for year in years_of_interest:
# Filter data for the specific year
data_for_year = pwt_data[pwt_data['Year'] == year]
# Scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(data_for_year['GDP_at_Constant_2017_National_Prices'], data_for_year['Capital_Stock'], alpha=0.7)
plt.title(f'Relation between GDP and Capital ({year})')
plt.xlabel('GDP at Constant 2017 National Prices')
plt.ylabel('Capital Stock')
plt.grid(True)
# Save the plot
file_path = f'./graphs/scatter_plot_gdp_vs_capital_{year}.png'
plt.savefig(file_path)
print(f'Plot saved as PNG: {file_path}')
# Show the plot (optional)
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
# Assuming you have a DataFrame named 'pwt_data' with the relevant columns
# and columns like 'Income_Group' and 'Region' that represent income groups and regions
# List of measures
measures = [
'GDP', 'Capital_per_Capita', 'Capital_per_Worker',
'Capital_per_Effective_Worker', 'Capital_per_Unit_of_Effective_Labor'
]
# Loop through measures
for measure in measures:
# Separate figures for each income group
plt.figure(figsize=(12, 8))
sns.set_palette("husl")
for income_group in pwt_data['Income_Group'].unique():
data_income_group = pwt_data[pwt_data['Income_Group'] == income_group]
plt.plot(data_income_group['Year'], data_income_group[measure], label=income_group)
plt.title(f'Evolution of {measure} by Income Group')
plt.xlabel('Year')
plt.ylabel(measure)
plt.legend()
plt.grid(True)
# Save the plot
file_path = f'./graphs/line_plot_{measure}_by_income_group.png'
plt.savefig(file_path)
print(f'Plot saved as PNG: {file_path}')
# Show the plot (optional)
plt.show()
# Separate figures for each region
plt.figure(figsize=(12, 8))
sns.set_palette("husl")
for region in pwt_data['Region'].unique():
data_region = pwt_data[pwt_data['Region'] == region]
plt.plot(data_region['Year'], data_region[measure], label=region)
plt.title(f'Evolution of {measure} by Region')
plt.xlabel('Year')
plt.ylabel(measure)
plt.legend()
plt.grid(True)
# Save the plot
file_path = f'./graphs/line_plot_{measure}_by_region.png'
plt.savefig(file_path)
print(f'Plot saved as PNG: {file_path}')
# Show the plot (optional)
plt.show()
import seaborn as sns
import matplotlib.pyplot as plt
# Assuming you have a DataFrame named 'pwt_data' with relevant columns
# Select the variables for which you want to plot the KDEs
variables = ['Log_Income_Per_Capita', 'Log_Capital_Per_Worker']
# Extract the years you want to include in the plots
selected_years = list(range(1950, pwt_data['Year'].max() + 1, 10))
selected_years.append(pwt_data['Year'].max())
# Filter data for the selected years
selected_data = pwt_data[pwt_data['Year'].isin(selected_years)]
# Loop through variables and create KDE plots
for variable in variables:
# Separate figures for each variable
plt.figure(figsize=(12, 8))
sns.set_palette("husl")
# Loop through years and create subplots
for year in selected_years:
data_for_year = selected_data[selected_data['Year'] == year]
sns.kdeplot(data=data_for_year, x=variable, label=str(year), fill=False)
plt.title(f'Distribution of {variable} Every 10 Years')
plt.xlabel(variable)
plt.ylabel('Density')
plt.legend()
plt.grid(True)
# Save the plot
file_path = f'./graphs/kde_plot_{variable}_every_10_years.png'
plt.savefig(file_path)
print(f'Plot saved as PNG: {file_path}')
# Show the plot (optional)
plt.show()