import pandas as pd
import scipy as stats
from scipy.stats import linregress
import numpy as np
import matplotlib.pyplot as plt
import wbgapi as wb
import seaborn as sns
wb.source.info()
wb.series.info()
HCI_data = pd.read_csv('HCI.csv')
print(list(HCI_data.columns))
HCI_data
def linreg(table,x,y):
res=linregress(table.dropna()[x], table.dropna()[y])
print('Correlation:', res.rvalue), print('P-Value:', res.pvalue)
plt.xlabel(x)
plt.ylabel(y)
plt.plot(table[x], res.intercept + res.slope*table[x], 'r', label='fitted line')
HCI_data.plot(kind='scatter', x='Harmonized Test Scores', y='Adult Survival Rate', figsize=(10, 6), color='darkblue')
plt.title('Correlation between Adult Survival Rate and Harmonized Test Scores')
plt.xlabel('Harmonized Test Scores')
plt.ylabel('Adult Survival Rate')
linreg(HCI_data, 'Harmonized Test Scores', 'Adult Survival Rate')
plt.show()
edu_spending = wb.data.DataFrame('NY.ADJ.AEDU.GN.ZS')
percent_gni_spending=edu_spending['YR2010']
#HCI_data.merge('region', 'NY.ADJ.AEDU.GN.ZS')
country_code=HCI_data['WB Code'].fillna(0)
percent_gni_spending = percent_gni_spending[:(len(country_code))]
HCI_data.insert(3, 'GNI Spending on Education', list(percent_gni_spending))
HCI_data
wb.economy.DataFrame()
HCI_data.plot(kind='scatter', x='Harmonized Test Scores', y='GNI Spending on Education', figsize=(10, 6), color='darkblue')
plt.title('Test Scores vs GNI Spending on Education')
plt.xlabel('Learning-Adjusted Years of School')
plt.ylabel('GNI Spending on Education')
linreg(HCI_data, 'Harmonized Test Scores', 'GNI Spending on Education')
plt.show()
HCI_data.plot(kind='scatter', x='Learning-Adjusted Years of School', y='GNI Spending on Education', figsize=(10, 6), color='darkblue')
plt.title('Does GNI Spending Proportion Affect How Many Years Children Spend in School?')
plt.xlabel('Learning-Adjusted Years of School')
plt.ylabel('GNI Spending on Education')
linreg(HCI_data, 'Learning-Adjusted Years of School', 'GNI Spending on Education')
plt.show()
http://nces.ed.gov/programs/coe/indicator/cmd#3
edu_test = pd.read_excel('edu_spending.xlsx')
edu_test
edu2 = edu_test.rename(columns={'Unnamed: 1':'WB Code', 'Unnamed: 4':'Spending per Student (thousands USD)'})
edu2
merged_table = pd.merge(HCI_data, edu2, on=['WB Code'])
merged_table = merged_table[merged_table['Spending per Student (thousands USD)']!= 'SE.XPD.PRIM.PC.ZS']
merged_table=merged_table.astype({'Spending per Student (thousands USD)': float})
merged_table
#merged_table_v2=merged_table.insert(0, "Spending per student(US Dollars)", new_lst, True)
merged_table.plot(kind='scatter', x='Harmonized Test Scores', y='Spending per Student (thousands USD)', figsize=(10, 6), color='darkblue')
plt.title('Real Amount Spent on Each Student vs Test Scores')
plt.xlabel('Harmonized Test Scores')
plt.ylabel('Spending per Student (thousands USD)')
linreg(merged_table, 'Harmonized Test Scores', 'Spending per Student (thousands USD)')
plt.show()
merged_table.plot(kind='scatter', x='Learning-Adjusted Years of School', y='Spending per Student (thousands USD)', figsize=(10, 6), color='darkblue')
plt.title('Real Amount Spent on Each Student vs Years in School')
plt.xlabel('Learning-Adjusted Years of School')
plt.ylabel('Spending per Student (thousands USD)')
linreg(merged_table, 'Learning-Adjusted Years of School', 'Spending per Student (thousands USD)')
plt.show()