import pandas as pd
data="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/eu-govt-bonds.tsv"
df = pd.read_csv(data, sep="\t")
df.head()
Run to view results
df.columns
Run to view results
df.columns = [col.strip() for col in df.columns]
df.columns
Run to view results
df['country_code']=df.iloc[:,0].str[-2:]
df.drop(['int_rt,geo\\time'], axis=1, inplace = True)
df.head()
Run to view results
df.dtypes
# uh oh, objects (strings)
Run to view results
# find non-numeric data
df[df['2019M02'].str.contains("[a-zA-Z]")]
Run to view results
# drop row that contains letters iso numbers
df.drop(df[df['2019M02'].str.contains("[a-zA-Z]")].index, inplace=True)
Run to view results
df.head()
Run to view results
# convert data to numeric
df[df.columns[:-1]] = df[df.columns[:-1]].apply(pd.to_numeric, errors='coerce')
Run to view results
df.dtypes
Run to view results
df.head()
Run to view results
url="https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/country-codes.csv"
Run to view results
df_codes = pd.read_csv(url, encoding="latin-1")
df_codes.head()
Run to view results
df = pd.merge(df, df_codes[['Alpha-2 code','Country']], left_on ='country_code', right_on = 'Alpha-2 code', how = 'left').drop(columns = ['Alpha-2 code'])
df.head()
Run to view results
df[df['Country'].isnull()][['country_code','Country']]
Run to view results
df.loc[df['country_code'] == 'UK', 'country_code'] = 'GB'
df[29:]['country_code']
Run to view results
df[df['2020M01']==df['2020M01'].max()]['Country']
Run to view results
df[df['2020M01']==df['2020M01'].min()]['Country']
Run to view results
df['diff'] = abs(df['2020M01'] - df['2019M02'])
df.sort_values('diff').head(1)
Run to view results
df.to_csv("eu-govt-bonds-clean.csv", index=False)
Run to view results
df_t = df.copy().T
df_t.columns = df_t.loc['Country']
df_t.drop(['country_code','Country','diff'], axis=0, inplace=True)
df_t.mean(axis=0)
Run to view results
df_t.median(axis=0)
Run to view results
#copied from solutions
df_stacked = (
df
.set_index(["country_code", "Country"])
.stack() # this puts all the month columns in one column
.reset_index() # convert the resulting Series to a DataFrame
.rename(columns={"level_2": "month", 0: "rate"}) # rename the resulting default column names
)
df_stacked.head()
Run to view results
df_stacked.groupby("Country")["rate"].agg(["mean", "median"])
Run to view results