#dataset downloaded from ourworldindata.org
import os
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
df = pd.read_csv('Total Oppurtunity.csv')
df.head()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Account Name 160 non-null object
1 Group Opportunities 160 non-null object
2 Billing Country 158 non-null object
3 Close Date 160 non-null object
4 Full Name 160 non-null object
5 Industry 160 non-null object
6 Name (Product) 160 non-null object
7 Closed 160 non-null bool
8 Opportunity ID 160 non-null object
9 Won 160 non-null bool
10 # of Losses 160 non-null int64
11 # of Open 160 non-null int64
12 # of Wins 160 non-null int64
13 Amount 160 non-null object
14 Expected Amount 160 non-null object
15 Sales 160 non-null int64
dtypes: bool(2), int64(4), object(10)
memory usage: 17.9+ KB
import datetime as dt
df['Close Date'] = pd.to_datetime(df['Close Date'])
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160 entries, 0 to 159
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Account Name 160 non-null object
1 Group Opportunities 160 non-null object
2 Billing Country 158 non-null object
3 Close Date 160 non-null datetime64[ns]
4 Full Name 160 non-null object
5 Industry 160 non-null object
6 Name (Product) 160 non-null object
7 Closed 160 non-null bool
8 Opportunity ID 160 non-null object
9 Won 160 non-null bool
10 # of Losses 160 non-null int64
11 # of Open 160 non-null int64
12 # of Wins 160 non-null int64
13 Amount 160 non-null object
14 Expected Amount 160 non-null object
15 Sales 160 non-null int64
dtypes: bool(2), datetime64[ns](1), int64(4), object(9)
memory usage: 17.9+ KB
df.isnull().sum()
df.dropna(inplace=True)
df.isnull().sum()
df.describe()
df.sample(10)
df.drop(['Closed','# of Losses','# of Open', '# of Wins','Sales', 'Won', 'Group Opportunities'], axis = 'columns')
#df['Account Name'].unique()
df.value_counts()
df_final = df.drop(['Closed','# of Losses','# of Open', '# of Wins','Sales', 'Won', 'Group Opportunities'], axis = 'columns')
df_final
df.to_excel('Total Oppurtunity Cleaned.xlsx')