import pandas as pd
import numpy as np
import datetime as dt
df = pd.read_csv('supermarket_sales - Sheet1.csv')
df.head()
df.shape
df.isnull().sum()
df.describe()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Invoice ID 1000 non-null object
1 Branch 1000 non-null object
2 City 1000 non-null object
3 Customer type 1000 non-null object
4 Gender 1000 non-null object
5 Product line 1000 non-null object
6 Unit price 1000 non-null float64
7 Quantity 1000 non-null int64
8 Tax 5% 1000 non-null float64
9 Total 1000 non-null float64
10 Date 1000 non-null object
11 Time 1000 non-null object
12 Payment 1000 non-null object
13 cogs 1000 non-null float64
14 gross margin percentage 1000 non-null float64
15 gross income 1000 non-null float64
16 Rating 1000 non-null float64
dtypes: float64(7), int64(1), object(9)
memory usage: 132.9+ KB
df['Date'] = pd.to_datetime(df['Date'])
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 17 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Invoice ID 1000 non-null object
1 Branch 1000 non-null object
2 City 1000 non-null object
3 Customer type 1000 non-null object
4 Gender 1000 non-null object
5 Product line 1000 non-null object
6 Unit price 1000 non-null float64
7 Quantity 1000 non-null int64
8 Tax 5% 1000 non-null float64
9 Total 1000 non-null float64
10 Date 1000 non-null datetime64[ns]
11 Time 1000 non-null object
12 Payment 1000 non-null object
13 cogs 1000 non-null float64
14 gross margin percentage 1000 non-null float64
15 gross income 1000 non-null float64
16 Rating 1000 non-null float64
dtypes: datetime64[ns](1), float64(7), int64(1), object(8)
memory usage: 132.9+ KB
df['Order Date']= pd.date_range(start='1/1/2018', end = '9/26/2020')
df.head()
df.replace(to_replace='Yangon', value='Lagos')
df.replace(to_replace='Naypyitaw', value='Abuja')
df.replace(to_replace='Yangon', value='Lagos')
df.replace(to_replace='Mandalay', value='Port Harcourt')
df.to_excel('Supermart_Cleaned.xlsx')