# Importing the pandas and plotly modules
import pandas as pd
import plotly.express as px
# Reading in the sales data
df = pd.read_csv('sales_data.csv', parse_dates=['date'])
# Take a look at the first datapoints
df.head()
datedatetime64[ns]
warehouseobject
0
2021-06-01 00:00:00
Central
1
2021-06-01 00:00:00
North
2
2021-06-01 00:00:00
North
3
2021-06-01 00:00:00
North
4
2021-06-01 00:00:00
Central
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 date 1000 non-null datetime64[ns]
1 warehouse 1000 non-null object
2 client_type 1000 non-null object
3 product_line 1000 non-null object
4 quantity 1000 non-null int64
5 unit_price 1000 non-null float64
6 total 1000 non-null float64
7 payment 1000 non-null object
dtypes: datetime64[ns](1), float64(2), int64(1), object(4)
memory usage: 62.6+ KB
df.describe(include='all', datetime_is_numeric=True)
dateobject
10009.1%
6 others54.5%
Missing36.4%
warehouseobject
10009.1%
3 others27.3%
Missing63.6%
count
1000
1000
unique
nan
3
top
nan
Central
freq
nan
480
mean
2021-07-15 00:05:45.600000
nan
min
2021-06-01 00:00:00
nan
25%
2021-06-24 00:00:00
nan
50%
2021-07-14 00:00:00
nan
75%
2021-08-06 00:00:00
nan
max
2021-08-28 00:00:00
nan
# What are the total sales for each payment method?
fig = px.histogram(df, x='payment', y='total', color='client_type',
# text_auto=True,
title='Total sales by payment mothod',
labels={
'client_type': 'Client',
'payment': 'Payment method'
})
fig.show()
# What is the average unit price for each product line?
avg_unit = df.groupby(['product_line', 'client_type'], as_index=False)[['unit_price']].mean().round(2)
fig = px.bar(avg_unit, x='product_line', y='unit_price', color='client_type',
barmode='group',
# text_auto=True,
title='Average unit price by product line',
labels={
'client_type': 'Client',
'product_line': 'Product line',
'unit_price': 'Avg. unit price'
})
fig.show()
# Grouping by client_type and obtaining the total sum of sales
client_sales = df.groupby('client_type', as_index=False)[['total']].sum()
# Adding an 'perc_sales' column
client_sales['perc_sales'] = (client_sales.total / client_sales.total.sum()) * 100
# Separating percentages by client_type and rounding
retail_sales_per = client_sales['perc_sales'][0].round(2)
wholesale_sales_per = client_sales['perc_sales'][1].round(2)
# Obtaining the normalized count percentages of client_type
client_orders = df.client_type.value_counts(normalize=True)
# Printing the results
print('Retail accounts for: {}% of total sales and {}% of total orders'.format(
retail_sales_per, (client_orders[0]*100).round(2)))
print('Wholesale accounts for: {}% of total sales and {}% of total orders'.format(
wholesale_sales_per, (client_orders[1]*100).round(2)))
Retail accounts for: 44.78% of total sales and 77.5% of total orders
Wholesale accounts for: 55.22% of total sales and 22.5% of total orders
fig = px.histogram(df, x='client_type', color='client_type')
fig.update_layout(showlegend=False)
fig.show()
fig = px.pie(df, values='total', names='client_type')
fig.update_traces(textposition='inside', textinfo='percent+label', showlegend=False)
fig.show()
fig = px.histogram(df, x='total', y='product_line', color='client_type',
barmode='group',
# text_auto=True,
title='Total sales by product line',
labels={
'client_type': 'Client',
'product_line': 'product line'
})
fig.show()
fig = px.histogram(df, x='quantity', y='product_line', color='client_type',
barmode='group',
# text_auto=True,
title='Sales volume by product line',
labels={
'client_type': 'Client',
'product_line': 'product'
})
fig.show()
fig = px.histogram(df, y='total', x='product_line', facet_col='warehouse', color='client_type',
barmode='group', title='Warehouse total sales by product line')
fig.show()
fig = px.scatter(df, x='date', y='total', color='client_type', opacity=0.5, marginal_x='histogram',
title='Client sales by date',
labels={'client_type': 'client'})
fig.show()