Lesson 14 - Merging datasets

import pandas as pd data="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/eu-govt-bonds.tsv" df = pd.read_csv(data, sep="\t") df.head()

Run to view results

df.columns

Run to view results

df.columns = [col.strip() for col in df.columns] df.columns

Run to view results

df['country_code']=df.iloc[:,0].str[-2:] df.drop(['int_rt,geo\\time'], axis=1, inplace = True) df.head()

Run to view results

df.dtypes # uh oh, objects (strings)

Run to view results

# find non-numeric data df[df['2019M02'].str.contains("[a-zA-Z]")]

Run to view results

# drop row that contains letters iso numbers df.drop(df[df['2019M02'].str.contains("[a-zA-Z]")].index, inplace=True)

Run to view results

df.head()

Run to view results

# convert data to numeric df[df.columns[:-1]] = df[df.columns[:-1]].apply(pd.to_numeric, errors='coerce')

Run to view results

df.dtypes

Run to view results

df.head()

Run to view results

url="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/country-codes.csv"

Run to view results

df_codes = pd.read_csv(url, encoding="latin-1") df_codes.head()

Run to view results

df = pd.merge(df, df_codes[['Alpha-2 code','Country']], left_on ='country_code', right_on = 'Alpha-2 code', how = 'left').drop(columns = ['Alpha-2 code']) df.head()

Run to view results

df[df['Country'].isnull()][['country_code','Country']]

Run to view results

df.loc[df['country_code'] == 'UK', 'country_code'] = 'GB' df[29:]['country_code']

Run to view results

df[df['2020M01']==df['2020M01'].max()]['Country']

Run to view results

df[df['2020M01']==df['2020M01'].min()]['Country']

Run to view results

df['diff'] = abs(df['2020M01'] - df['2019M02']) df.sort_values('diff').head(1)

Run to view results

df.to_csv("eu-govt-bonds-clean.csv", index=False)

Run to view results

df_t = df.copy().T df_t.columns = df_t.loc['Country'] df_t.drop(['country_code','Country','diff'], axis=0, inplace=True) df_t.mean(axis=0)

Run to view results

df_t.median(axis=0)

Run to view results

#copied from solutions df_stacked = ( df .set_index(["country_code", "Country"]) .stack() # this puts all the month columns in one column .reset_index() # convert the resulting Series to a DataFrame .rename(columns={"level_2": "month", 0: "rate"}) # rename the resulting default column names ) df_stacked.head()

Run to view results

df_stacked.groupby("Country")["rate"].agg(["mean", "median"])

Run to view results