Day 9 - Network data exercise
Jame Zou | MA346-SN1F
import pandas as pd
# read in data
df_ship = pd.read_excel("shipping1997.xlsx")
df_states = pd.read_excel("shipping1997.xlsx", sheet_name=1)
df_comm = pd.read_excel("shipping1997.xlsx", sheet_name=2)
df_dist = pd.read_excel("shipping1997.xlsx", sheet_name=3)
# create copy of df_ship
df_ship2 = df_ship.copy()
# state abbreviations-names dictionary
abbr = pd.Series(df_states['State'].values, index=df_states['Abbreviation']).to_dict()
# replace abbreviations with names
df_ship2 = df_ship2.replace({'origin': abbr})
df_ship2 = df_ship2.replace({'dest': abbr})
df_ship2.head()
# convert adjacent column to boolean
df_dist['adjacent'] = df_dist['adjacent'].astype('bool')
df_dist.head()
# merge df_dist and df_ship
df_ship_dist = pd.merge(df_ship, df_dist, how='left', on=['origin', 'dest'])
df_ship_dist.head()