import xlrd
import pandas as pd
df = pd.read_excel("shipping1997.xlsx", sheet_name=None)
df
df_shipping = df['shipping']
df_states = df['states']
df_commodities = df['commodities']
df_distances = df['distances']
us_state_abbrev = {
'Alabama': 'AL',
'Alaska': 'AK',
'Arizona': 'AZ',
'Arkansas': 'AR',
'California': 'CA',
'Colorado': 'CO',
'Connecticut': 'CT',
'Delaware': 'DE',
'Florida': 'FL',
'Georgia': 'GA',
'Hawaii': 'HI',
'Idaho': 'ID',
'Illinois': 'IL',
'Indiana': 'IN',
'Iowa': 'IA',
'Kansas': 'KS',
'Kentucky': 'KY',
'Louisiana': 'LA',
'Maine': 'ME',
'Maryland': 'MD',
'Massachusetts': 'MA',
'Michigan': 'MI',
'Minnesota': 'MN',
'Mississippi': 'MS',
'Missouri': 'MO',
'Montana': 'MT',
'Nebraska': 'NE',
'Nevada': 'NV',
'New Hampshire': 'NH',
'New Jersey': 'NJ',
'New Mexico': 'NM',
'New York': 'NY',
'North Carolina': 'NC',
'North Dakota': 'ND',
'Ohio': 'OH',
'Oklahoma': 'OK',
'Oregon': 'OR',
'Pennsylvania': 'PA',
'Rhode Island': 'RI',
'South Carolina': 'SC',
'South Dakota': 'SD',
'Tennessee': 'TN',
'Texas': 'TX',
'Utah': 'UT',
'Vermont': 'VT',
'Virginia': 'VA',
'Washington': 'WA',
'West Virginia': 'WV',
'Wisconsin': 'WI',
'Wyoming': 'WY'
}
df_shipping['origin'] = df_shipping['origin'].replace(us_state_abbrev.values(), us_state_abbrev.keys())
df_shipping['dest'] = df_shipping['dest'].replace(us_state_abbrev.values(), us_state_abbrev.keys())
df_states = df_states.rename(columns={ 'Abbreviation' : 'origin' })
df_states['origin'] = df_states['origin'].replace(us_state_abbrev.values(), us_state_abbrev.keys())
df_all = pd.merge(df_shipping,df_states, on='origin')
df_all = df_all[['State', 'origin', 'dest', 'SCTG Code', 'Value($ mil)', 'Tons(000)', 'Ton-miles(mil)', 'GDP']]
df_all
df_distances['dest'] = df_distances['dest'].replace(us_state_abbrev.values(), us_state_abbrev.keys())
df_all2 = pd.merge(df_all, df_distances, on='dest')
df_all2['adjacent'] = df_all2['adjacent'].astype('bool')
df_all2