import numpy as np
import pandas as pd
#beta from Problem 1
beta = [-1.74695522, 0.40816854, 0.10884091, 0.10102536, 0.02222656,
0.04382357, -0.06633965, -1.3309358 , 0.15952172]
data = pd.read_csv('data.csv')
data.shape
data.head()
data.columns
features = ['prop_starrating', 'prop_review_score', 'prop_brand_bool',
'prop_location_score', 'prop_accesibility_score',
'prop_log_historical_price', 'price_usd', 'promotion_flag',
'srch_booking_window', 'srch_adults_count', 'srch_children_count',
'srch_room_count', 'srch_saturday_night_bool']
mean = {}
std = {}
for feature in features:
s = pd.Series(data[feature])
mean[feature] = s.mean()
std[feature] =s.std()
data1 = pd.read_csv('data1.csv')
data1.columns
new_features = ['prop_starrating', 'prop_review_score', 'prop_brand_bool',
'prop_location_score', 'prop_accesibility_score',
'prop_log_historical_price', 'price_usd', 'promotion_flag']
def revenue(assortment, beta):
num_hotel = assortment.shape[0]
base = 1
top = 0
for i in range(num_hotel):
u = 0
u += beta[0]
for j in range(1,9):
u += beta[j] * assortment.iloc[i][j-1]
# print(u)
v = np.exp(u)
base += v
p = assortment.iloc[i][-2]
top += v*p
expected_revenue = top / base
return expected_revenue
def find_optimal(df):
for feature in new_features:
df[feature] = (df[feature] - mean[feature]) / std[feature]
df = df.sort_values(["price_usd"],ascending=False)
sorted_df = df.reset_index(drop=True)
num = sorted_df.shape[0]
revenue_list = []
for i in range(num):
assortment = sorted_df.head(i+1)
revenue_list.append(revenue(assortment,beta))
maximum = max(revenue_list)
for i in range(len(revenue_list)):
if revenue_list[i] == maximum:
print('The optimal assortment is a display of ', (i+1), ' highest priced products.')
find_optimal(data1)
The optimal assortment is a display of 4 highest priced products.
def expected_revenue(num, df):
optimal_assortment = df.head(num)
num_hotel = optimal_assortment.shape[0]
base = 1
top = 0
for i in range(num_hotel):
u = 0
u += beta[0]
for j in range(1,9):
u += beta[j] * optimal_assortment.iloc[i][j-1]
# print(u)
v = np.exp(u)
base += v
p = optimal_assortment.iloc[i][-2] * std['price_usd'] + mean['price_usd']
top += v*p
expected_revenue = top / base
print('expected revenue is', expected_revenue)
expected_revenue(4, data1)
expected revenue is 60.314832206297794
data2 = pd.read_csv('data2.csv')
find_optimal(data2)
The optimal assortment is a display of 3 highest priced products.
expected_revenue(3, data2)
expected revenue is 86.64853486051382
data3 = pd.read_csv('data3.csv')
data3.shape
find_optimal(data3)
The optimal assortment is a display of 9 highest priced products.
expected_revenue(9, data3)
expected revenue is 99.7909184869044
data4 = pd.read_csv('data4.csv')
data4.shape
find_optimal(data4)
The optimal assortment is a display of 1 highest priced products.
expected_revenue(1, data4)
expected revenue is 29.699069888307854