import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional
# read database
sales = pd.read_csv('data/tesco_sales.csv')
competitors = pd.read_csv('data/competitor_prices.csv').dropna()
competitors["Product_id"] = competitors["Product_id"].astype('int')
competitors.Date = pd.to_datetime(competitors.Date)
sales.Date = pd.to_datetime(sales.Date)
sales['profit_per_item'] = sales["Selling_price"] - sales["Cost_Price"]
sales['daily_profit'] = sales['profit_per_item'] * sales['Sales_qty']
gp_sales = sales.groupby("Product_id")
sales.head()
competitor_products = set(competitors.Product_id.unique())
our_products = set(sales.Product_id.unique())
product_without_competitors = our_products.difference(competitor_products)
product_with_competitors = our_products.intersection(competitor_products)
len(product_with_competitors), len(product_without_competitors)
product_with_highest_sales = sales.groupby("Product_id").sum().sort_values("Selling_price",ascending=False)[:10].index
product_with_highest_sales
for i in set(product_with_highest_sales).intersection(product_without_competitors):
s1 = sales[sales.Product_id==i]
plt.scatter(s1.Selling_price,s1.daily_profit)
print('Product id: ', i)
plt.xlabel('price')
plt.ylabel('Profit')
plt.show()
Product id: 128
Product id: 97
Product id: 8
# for i in set(product_with_highest_sales).intersection(product_with_competitors):
# s1 = sales[sales.Product_id==i]
# plt.scatter(s1.Selling_price,s1.Sales_qty)
# print('Product id: ', i)
# plt.xlabel('price')
# plt.ylabel('quantity')
# plt.show()
s1['daily_profit'].cumsum().plot()
wc = 250
woc = 8
s1 = sales[sales["Product_id"]==wc].copy()
s1.set_index("Date",inplace=True)
s1.sort_index(inplace=True)
s1['daily_profit'].cumsum().plot();plt.title("Total Profit ");plt.show()
s1['Sales_qty'].plot();plt.title("# Sales");plt.show()
s1['daily_profit'].plot();plt.title("Daily Profit");plt.show()
s1['Selling_price'].plot();plt.title("Sell Price");plt.show()
s1['Cost_Price'].plot();plt.title("CostPrice");plt.show();
s1 = sales[sales["Product_id"]==woc].copy()
# s1.set_index("Sales_qty",inplace=True)
s1.sort_index(inplace=True)
# s1['daily_profit'].cumsum().plot();plt.title("Total Profit ");plt.show()
# s1['Sales_qty'].plot();plt.title("# Sales");plt.show()
# s1['daily_profit'].plot(kind='scatter');plt.title("Daily Profit");plt.show()
# s1['Selling_price'].plot();plt.title("Sell Price");plt.show()
# s1['Cost_Price'].plot();plt.title("CostPrice");plt.show();
s1.plot('Sales_qty', 'daily_profit',kind='scatter');plt.show()
s1.plot( 'Selling_price','Sales_qty',kind='scatter')
for i, gp in competitors.groupby('Competitor_id'):
break
display(gp.head())
display(sales.head())
merged = sales.merge(competitors,left_on=['Date','Product_id'], right_on=['Date','Product_id'],how='outer')
merged = merged.dropna()
merged['expensive'] = merged.Selling_price > merged.Competitor_Price
merged.groupby('expensive').sum()
merged.groupby('expensive').count()
s1.sort_values('daily_profit',ascending=False).head(20)
s1.describe()
s1
s1['sp'] = s1.Selling_price.apply(round)
plt.plot(s1.groupby('sp').sum().daily_profit,'go')
plt.plot(s1.groupby('sp').mean().daily_profit,'go')
s1.groupby('sp').sum().daily_profit.sort_values()
s1.groupby('sp').mean().daily_profit.sort_values()
s1.groupby('sp').count().daily_profit.sort_values()
s1[s1.sp==83]
1.Give an overview of the data. What issues are there?
2. Assuming we want to increase profit, provide the recommended prices for each of the products.
! pip install statsmodels pandas matplotlib seaborn
# Price optimsization
# 2. Assuming we want to increase profit, provide the recommended prices for each of the products.
"""
# Path: read.ipynb
# Price optimsization
"""
from statsmodels.formula.api import ols
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Optional
sns.set_style()
class PriceOptimizer:
def __init__(
self, df: pd.DataFrame, p_id: int, competitors: Optional[pd.DataFrame] = None
) -> None:
self.sales = df
product_ids = df.Product_id.unique()
if not p_id in product_ids:
raise ValueError("Product id not in data")
self.p_id = p_id
self.df = df[df.Product_id == p_id].copy()
self.competitors = None
if competitors:
competitor_ids = competitors.Product_id.unique()
if p_id in competitor_ids:
self.competitors = competitors[competitors.Product_id == p_id].copy()
self.competitors = self.competitors.drop(columns=["Product_id"])
# most of time its a constant
self.cost_price = self.df.Cost_Price.mean()
self.verbose = False
self.params = self.demand_curve()
self.intercept = self.params.Intercept
self.slope = self.params.Selling_price
def demand_curve(self) -> pd.DataFrame:
"""
Returns the demand curve parameters
"""
# fit ols model
model = ols(formula="Sales_qty ~ Selling_price", data=self.df).fit()
# extract parameters
params = model.params
if self.verbose:
print(model.summary())
return params
def demand(self, price: float) -> float:
"""
Returns the demand for a given price
"""
return self.intercept + self.slope * price
def revenue(self, p: float) -> float:
"""
Returns the revenue for a given quantity and selling price
"""
q = self.demand(p)
return q * p
def profit(self, p: float) -> float:
"""
Returns the profit for a given quantity and selling price
"""
q = self.demand(p)
return self.revenue(p) - self.cost_price
def profit_maximizer(self, plot: bool =False) -> float:
"""
Optimizes the price for the product
"""
# price range
price_range = np.linspace(0, self.df.Selling_price.max()*1.2, 100)
# profit for each price
profit = [self.profit(p) for p in price_range]
# optimal price
opt_price = price_range[np.argmax(profit)]
if plot:
sns.lineplot(x=price_range, y=profit)
# vline
plt.axvline(x=opt_price, color="red")
plt.title("Profit maximizer")
plt.ylabel = "Profit"
plt.xlabel = "Selling price"
plt.show()
return opt_price
def plot_demand_curve(self) -> None:
"""
Plots the demand curve
"""
sns.lmplot(x="Selling_price", y="Sales_qty", data=self.df, fit_reg=True, size=4)
plt.show()
sales = pd.read_csv('data/tesco_sales.csv')
competitors = pd.read_csv('data/competitor_prices.csv').dropna()
po = PriceOptimizer(sales, 120)
po.intercept
OLS Regression Results
==============================================================================
Dep. Variable: Sales_qty R-squared: 0.049
Model: OLS Adj. R-squared: 0.041
Method: Least Squares F-statistic: 6.463
Date: Mon, 26 Jul 2021 Prob (F-statistic): 0.0122
Time: 13:07:20 Log-Likelihood: -190.87
No. Observations: 128 AIC: 385.7
Df Residuals: 126 BIC: 391.4
Df Model: 1
Covariance Type: nonrobust
=================================================================================
coef std err t P>|t| [0.025 0.975]
---------------------------------------------------------------------------------
Intercept 3.5407 0.754 4.699 0.000 2.049 5.032
Selling_price -0.0433 0.017 -2.542 0.012 -0.077 -0.010
==============================================================================
Omnibus: 73.145 Durbin-Watson: 1.739
Prob(Omnibus): 0.000 Jarque-Bera (JB): 261.594
Skew: 2.193 Prob(JB): 1.57e-57
Kurtosis: 8.460 Cond. No. 348.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
po.plot_demand_curve()
po.profit_maximizer(plot=True)
/shared-libs/python3.7/py/lib/python3.7/site-packages/seaborn/regression.py:580: UserWarning: The `size` parameter has been renamed to `height`; please update your code.
warnings.warn(msg, UserWarning)
product_ids = sales.Product_id.unique()
# To see price for each product change verbose to True.
verbose = False
prices = {}
for p_id in product_ids:
po = PriceOtipmizer(sales, p_id)
price = po.profit_maximizer()
profit = po.profit(price)
prices[p_id] = {'price': price, 'profit': profit}
if verbose:
print(f"Optimal Price for product #{p_id}: {price}. profit:{profit}")