#import excel file and all its sheets
import pandas as pd
df = pd.read_excel('shipping1997.xlsx', sheet_name=None)
#check the sheetname names
df.keys()
#check the dataframe and observe the adjacent column has an integer datatype
df['distances'].head()
#convert adjacent column datatype to boolean
df['distances']['adjacent'] = df['distances']['adjacent'].astype(bool)
df['distances'].head()
#merge the shipping and distances dataframe where they are matched on the contents of their origin and dest columns
df_merged_distance_shipping = pd.merge(df['shipping'], df['distances'],
on = ['origin','dest'] )
df_merged_distance_shipping.head()
#merge the earlier merged dataframe with the states dataframe to be able to
#use the full state name instead of state abbreviation in the origin column
df_merged = pd.merge(df_merged_distance_shipping, df['states'],
left_on='origin', right_on='Abbreviation' )
#drop columns that are not relevant and are no longer needed for the assignment
df_merged.drop(['origin','Abbreviation','GDP'], inplace=True, axis=1)
df_merged = df_merged.rename(columns = {'State':'Origin'})
#merge the dataframe with the states dataframe again to have it merged
#on dest column in states this time to have those abbreviations matched
#with their corresponding full state name
df_merged = pd.merge(df_merged, df['states'],
left_on='dest', right_on='Abbreviation' )
df_merged.drop(['dest','Abbreviation','GDP'], inplace=True, axis=1)
df_merged = df_merged.rename(columns = {'State':'Destination'})
#move the Origin and Destination column to first and second position for easier viewing
dest="Destination"
dest_col = df_merged.pop(dest)
df_merged.insert(0, dest, dest_col)
origin = "Origin"
origin_col = df_merged.pop(origin)
df_merged.insert(0, origin, origin_col)
df_merged