Homework
import pandas as pd
data = pd.read_excel('shipping1997.xlsx', sheet_name=None)
shipping = data['shipping']
states = data['states']
# Function to replace abbreviations with state names & remove unnecessary columns produced from merge
def cleanup_merge(column):
global new_shipping
new_shipping[column] = new_shipping['State']
new_shipping = new_shipping.drop(['State', 'Abbreviation', 'GDP'], axis=1)
return new_shipping
# Using function to rewrite the 'origin' and 'dest' columns
new_shipping = pd.merge(shipping, states, left_on='origin', right_on='Abbreviation', how='left')
new_shipping = cleanup_merge('origin')
new_shipping = pd.merge(new_shipping, states, left_on='dest', right_on='Abbreviation', how='left')
new_shipping = cleanup_merge('dest')
# Adjusting the 'adjacent' column to a series of Boolean values for more clarity of it's meaning
distances = data['distances']
distances['adjacent'] = distances['adjacent'].astype(bool)
# Adding the 'Distance(km)' and 'adjacent' columns to the shipping table created earlier
new_shipping[['distance', 'adjacent']] = distances[['Distance(km)', 'adjacent']].copy()
print(new_shipping.head(100))
origin dest SCTG Code Value($ mil) Tons(000) Ton-miles(mil) \
0 Alaska Alaska 0 5376 17448 2624
1 Alaska Alaska 1 - - -
2 Alaska Alaska 2 - - -
3 Alaska Alaska 3 43 25 3
4 Alaska Alaska 4 S S S
.. ... ... ... ... ... ...
95 Alaska Arkansas 9 - - -
96 Alaska Arkansas 10 - - -
97 Alaska Arkansas 11 - - -
98 Alaska Arkansas 12 - - -
99 Alaska Arkansas 13 - - -
distance adjacent
0 64.091910 True
1 4581.716256 False
2 4036.346720 False
3 3217.304887 False
4 2387.072321 False
.. ... ...
95 1781.947912 False
96 3492.941049 False
97 1224.566697 False
98 789.706516 False
99 1913.137493 False
[100 rows x 8 columns]