import pandas as pd
#list of required fields need to be extracted from the raw csv file
req_cols=['Country Name','Year','GDP (current US$)','GDP, PPP (current international $)',
'GDP per capita (current US$)','GDP growth (annual %)','Imports of goods and services (% of GDP)',
'Exports of goods and services (% of GDP)','Central government debt, total (% of GDP)',
'Total reserves (includes gold, current US$)',
'Unemployment, total (% of total labor force) (modeled ILO estimate)',
'Inflation, consumer prices (annual %)',
'Personal remittances, received (% of GDP)',
'Population, total','Population growth (annual %)',
'Life expectancy at birth, total (years)',
'Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)']
def preprocessed_df(csvfile):
#load csv file as pandas dataframe
df=pd.read_csv(csvfile)
#select the country name column as pandas series
country_name=pd.Series(df["Country Name"].iloc[33:63])
#Renaming "Indicator Name" column into "Year"
df.rename(columns = {'Indicator Name':'Year'}, inplace = True)
#Droping "Country Name" and "Country Code" columns after that transposing the whole dataframe using transpose()
country_df = df.drop(["Country Name","Country Code"],axis=1).transpose()
#Setting the first row of the dataframe as row header
country_df.reset_index(inplace=True)
new_header = country_df.iloc[0]
country_df.columns = new_header
#The raw csv file contain data from 1960 but the required data are from 1990 to 2020.Here selecting the required years
final_df=country_df.iloc[33:63]
# Finally concating the country_name series with the final_df dataframe using pandas concat() function
return pd.concat([country_name,final_df],axis=1)
usa=pd.read_csv("USA.csv")
usa.head(6)
usa=preprocessed_df(csvfile="USA.csv")
usa[req_cols].head(6)
usa=preprocessed_df(csvfile="USA.csv")
china=preprocessed_df(csvfile="China.csv")
japan=preprocessed_df(csvfile="Japan.csv")
germany=preprocessed_df(csvfile="Germany.csv")
united_kingdom=preprocessed_df(csvfile="United_Kingdom.csv")
india=preprocessed_df(csvfile="India.csv")
#List of required countries names
countries=[usa,china,japan,germany,united_kingdom,india]
top_six=pd.concat([country[req_cols] for country in countries],axis=0)
top_six
top_six.shape
top_six.to_csv('top_six_economies.csv')