import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('Bakery Sales.csv')
df
datetimeobject
2019-11-09 11:360.3%
2360 others90.9%
Missing8.8%
day of weekobject
Sun20.9%
6 others70.3%
Missing8.8%
0
2019-07-11 15:35
Thur
1
2019-07-11 16:10
Thur
2
2019-07-12 11:49
Fri
3
2019-07-13 13:19
Sat
4
2019-07-13 13:22
Sat
5
2019-07-13 14:54
Sat
6
2019-07-13 15:08
Sat
7
2019-07-13 15:09
Sat
8
2019-07-13 15:23
Sat
9
2019-07-13 16:32
Sat
df_order_by_total = df.sort_values('total', ascending=False)
x = df_order_by_total['total'][:5]
print(x)
89 1293000.0
1444 116500.0
1326 91300.0
1495 77100.0
87 73200.0
Name: total, dtype: float64
df_order_by_total = df.sort_values('total', ascending=False)
x = df_order_by_total['total'][1:]
plt.hist(x)
plt.show()
df_order_by_total = df.sort_values('total', ascending=False)
x = df_order_by_total['total'][1:]
bins = [i for i in range(12000, 30000, 2500)]
plt.hist(x, bins)
plt.show()
df.groupby('day of week')['total'].mean().sort_values(ascending=False)
df.groupby('day of week')['total'].count().sort_values(ascending=False)
df.groupby('day of week')['total'].sum().sort_values(ascending=False)
df.groupby('hour')['total'].count().sort_values(ascending=False)
month = df['datetime'].str[6:7]
df['month'] = month
df.groupby('month')['total'].sum().sort_values(ascending=False)
df['year'] = df['datetime'].str[:4]
df.groupby('year')['total'].sum().sort_values(ascending=False)
print(df.columns)
df.drop(['place', 'angbutter', 'plain bread',
'jam', 'americano', 'croissant', 'caffe latte', 'tiramisu croissant',
'cacao deep', 'pain au chocolat', 'almond croissant', 'croque monsieur',
'mad garlic', 'milk tea', 'gateau chocolat', 'pandoro', 'cheese cake',
'lemon ade', 'orange pound', 'wiener', 'vanila latte', 'berry ade',
'tiramisu', 'merinque cookies'], axis=1)
Index(['datetime', 'day of week', 'total', 'place', 'angbutter', 'plain bread',
'jam', 'americano', 'croissant', 'caffe latte', 'tiramisu croissant',
'cacao deep', 'pain au chocolat', 'almond croissant', 'croque monsieur',
'mad garlic', 'milk tea', 'gateau chocolat', 'pandoro', 'cheese cake',
'lemon ade', 'orange pound', 'wiener', 'vanila latte', 'berry ade',
'tiramisu', 'merinque cookies', 'hour', 'month', 'year'],
dtype='object')
datetimeobject
2019-11-09 11:360.3%
2360 others90.9%
Missing8.8%
day of weekobject
Sun20.9%
6 others70.3%
Missing8.8%
0
2019-07-11 15:35
Thur
1
2019-07-11 16:10
Thur
2
2019-07-12 11:49
Fri
3
2019-07-13 13:19
Sat
4
2019-07-13 13:22
Sat
5
2019-07-13 14:54
Sat
6
2019-07-13 15:08
Sat
7
2019-07-13 15:09
Sat
8
2019-07-13 15:23
Sat
9
2019-07-13 16:32
Sat