import pandas as pd
!pip install xlrd
!pip install openpyxl
percapitaGDP = pd.read_excel(
"https://github.com/PacktPublishing/Python-Data-Cleaning-Cookbook/raw/master/Chapter01/data/GDPpercapita.xlsx",
sheet_name="OECD.Stat export",
skiprows=4,
skipfooter=1,
usecols="A,C:T"
)
percapitaGDP.head()
percapitaGDP.info()
percapitaGDP.rename(columns={"Year":"metro"}, inplace=True)
percapitaGDP.metro = percapitaGDP.metro.str.strip()
percapitaGDP.dtypes
for col in percapitaGDP.columns[1:]:
percapitaGDP[col] = pd.to_numeric(percapitaGDP[col], errors='coerce')
percapitaGDP.rename(columns={col:'pcGDP'+col}, inplace=True)
percapitaGDP.dtypes
percapitaGDP.describe()
percapitaGDP.dropna(subset=percapitaGDP.columns[1:], how='all', inplace=True)
percapitaGDP.describe()
percapitaGDP.head()
percapitaGDP.metro.count() == percapitaGDP.metro.nunique()
percapitaGDP.set_index('metro', inplace=True)
percapitaGDP.head()
percapitaGDP.loc['AUS01: Greater Sydney']