# Importing the pandas and plotly modules
import pandas as pd
import plotly.express as px
# Reading in the sales data
df = pd.read_csv('sales_data.csv', parse_dates=['date'])
# Take a look at the first datapoints
df.head()
df.info()
df.describe(include='all', datetime_is_numeric=True)
# What are the total sales for each payment method?
fig = px.histogram(df, x='payment', y='total', color='client_type',
# text_auto=True,
title='Total sales by payment mothod',
labels={
'client_type': 'Client',
'payment': 'Payment method'
})
fig.show()
# What is the average unit price for each product line?
avg_unit = df.groupby(['product_line', 'client_type'], as_index=False)[['unit_price']].mean().round(2)
fig = px.bar(avg_unit, x='product_line', y='unit_price', color='client_type',
barmode='group',
# text_auto=True,
title='Average unit price by product line',
labels={
'client_type': 'Client',
'product_line': 'Product line',
'unit_price': 'Avg. unit price'
})
fig.show()
# Grouping by client_type and obtaining the total sum of sales
client_sales = df.groupby('client_type', as_index=False)[['total']].sum()
# Adding an 'perc_sales' column
client_sales['perc_sales'] = (client_sales.total / client_sales.total.sum()) * 100
# Separating percentages by client_type and rounding
retail_sales_per = client_sales['perc_sales'][0].round(2)
wholesale_sales_per = client_sales['perc_sales'][1].round(2)
# Obtaining the normalized count percentages of client_type
client_orders = df.client_type.value_counts(normalize=True)
# Printing the results
print('Retail accounts for: {}% of total sales and {}% of total orders'.format(
retail_sales_per, (client_orders[0]*100).round(2)))
print('Wholesale accounts for: {}% of total sales and {}% of total orders'.format(
wholesale_sales_per, (client_orders[1]*100).round(2)))
fig = px.histogram(df, x='client_type', color='client_type')
fig.update_layout(showlegend=False)
fig.show()
fig = px.pie(df, values='total', names='client_type')
fig.update_traces(textposition='inside', textinfo='percent+label', showlegend=False)
fig.show()
fig = px.histogram(df, x='total', y='product_line', color='client_type',
barmode='group',
# text_auto=True,
title='Total sales by product line',
labels={
'client_type': 'Client',
'product_line': 'product line'
})
fig.show()
fig = px.histogram(df, x='quantity', y='product_line', color='client_type',
barmode='group',
# text_auto=True,
title='Sales volume by product line',
labels={
'client_type': 'Client',
'product_line': 'product'
})
fig.show()
fig = px.histogram(df, y='total', x='product_line', facet_col='warehouse', color='client_type',
barmode='group', title='Warehouse total sales by product line')
fig.show()
fig = px.scatter(df, x='date', y='total', color='client_type', opacity=0.5, marginal_x='histogram',
title='Client sales by date',
labels={'client_type': 'client'})
fig.show()