Data extraction from World bank data on Top 6 Economies

import pandas as pd

#list of required fields need to be extracted from the raw csv file req_cols=['Country Name','Year','GDP (current US$)','GDP, PPP (current international $)', 'GDP per capita (current US$)','GDP growth (annual %)','Imports of goods and services (% of GDP)', 'Exports of goods and services (% of GDP)','Central government debt, total (% of GDP)', 'Total reserves (includes gold, current US$)', 'Unemployment, total (% of total labor force) (modeled ILO estimate)', 'Inflation, consumer prices (annual %)', 'Personal remittances, received (% of GDP)', 'Population, total','Population growth (annual %)', 'Life expectancy at birth, total (years)', 'Poverty headcount ratio at $1.90 a day (2011 PPP) (% of population)']

def preprocessed_df(csvfile): #load csv file as pandas dataframe df=pd.read_csv(csvfile) #select the country name column as pandas series country_name=pd.Series(df["Country Name"].iloc[33:63]) #Renaming "Indicator Name" column into "Year" df.rename(columns = {'Indicator Name':'Year'}, inplace = True) #Droping "Country Name" and "Country Code" columns after that transposing the whole dataframe using transpose() country_df = df.drop(["Country Name","Country Code"],axis=1).transpose() #Setting the first row of the dataframe as row header country_df.reset_index(inplace=True) new_header = country_df.iloc[0] country_df.columns = new_header #The raw csv file contain data from 1960 but the required data are from 1990 to 2020.Here selecting the required years final_df=country_df.iloc[33:63] # Finally concating the country_name series with the final_df dataframe using pandas concat() function return pd.concat([country_name,final_df],axis=1)

usa=pd.read_csv("USA.csv") usa.head(6)

usa=preprocessed_df(csvfile="USA.csv") usa[req_cols].head(6)

usa=preprocessed_df(csvfile="USA.csv") china=preprocessed_df(csvfile="China.csv") japan=preprocessed_df(csvfile="Japan.csv") germany=preprocessed_df(csvfile="Germany.csv") united_kingdom=preprocessed_df(csvfile="United_Kingdom.csv") india=preprocessed_df(csvfile="India.csv")

#List of required countries names countries=[usa,china,japan,germany,united_kingdom,india] top_six=pd.concat([country[req_cols] for country in countries],axis=0)

top_six

top_six.shape

top_six.to_csv('top_six_economies.csv')