import pandas as pd
url = "https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/eu-govt-bonds.tsv"
df = pd.read_csv(url, sep='\t')
df.head()
"2020M01" in df.columns
"2020M01 " in df.columns
df.columns
df.columns = [c.strip() for c in df.columns]
print(df.columns)
df["2020M01"].head()
df["country_code"] = df["int_rt,geo\\time"].str[-2:]
df.drop("int_rt,geo\\time", axis=1, inplace=True)
df.head()
df.columns
df.dtypes
df["2019M02"].sort_values().head()
df["2019M02"].sort_values().tail()
url2 = "https://raw.githubusercontent.com/austinlasseter/hosting_some_files/main/pandas_files/country-codes.csv"
df_codes = pd.read_csv(url2, encoding="latin-1")
df_codes.head()
print(len(df_codes))
df_merged = df.merge(df_codes, left_on=["country_code"], right_on=["Alpha-2 code"], how="left")
print(len(df_merged))
df_merged.drop(["Alpha-2 code", "Alpha-3 code"], axis=1, inplace=True)
df_merged.head()
7. Check for missing data Is there any missing data in any of the columns? Which country codes were you not able to match to a row in the reference file?
df_merged.isnull().sum()
df_merged[df_merged["Country"].isnull()]
df_merged.loc[df_merged["country_code"] == "UK", "Country"] = "United Kingdom"
df_merged["country_code"].head()
jan_rates = df_merged[["Country", "2020M01"]].sort_values("2020M01")
print(jan_rates.head())
print(jan_rates.tail())
jan_rates.set_index("Country").agg(["idxmin", "idxmax"])
df_merged['2020M01'] = pd.to_numeric(df_merged['2020M01'], errors='coerce')
df_merged['2019M02'] = pd.to_numeric(df_merged['2019M02'], errors='coerce')
df_merged['diff'] = abs(df_merged['2020M01'] - df_merged['2019M02'])
df_merged.sort_values('diff').head(1)
df_merged.to_csv("eu-govt-bonds-cleanes.csv", index=False)
import pandas as pd
# Supongamos que df_merged es tu DataFrame
# Primero, crea la columna "Country" combinando las columnas "country_code" y "Country"
df_merged["Country"] = df_merged["country_code"] + " - " + df_merged["Country"]
# A continuación, establece "Country" como el índice y calcula la media y la mediana
result = df_merged.set_index("Country").agg(["mean", "median"], axis=1)
# Imprime el resultado
print(result)
df_merged.columns
df_merged.set_index("Country",inplace=True)
jan_rates["Country"].head()
df_merged.set_index(["country_code", "Country"]).mean(axis=1)
df_merged['Country'] = pd.to_numeric(df['Country'], errors='coerce')
df_staked= (
df_merged
.set_index(["country_code", "Country"])
.stake()
.reset_index()
.rename(columns={"level_2": "month", 0:"rate"})
)
df_staked.head()
df.groupby("Country")["rate"].agg(["mean", "median"])