#dataset obtained from dataworld
import os
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
df = pd.read_excel('Personal Consumption Expenditures.xlsx')
pip install openpyxl
Requirement already satisfied: openpyxl in c:\programdata\anaconda3\lib\site-packages (3.0.7)Note: you may need to restart the kernel to use updated packages.
Requirement already satisfied: et-xmlfile in c:\programdata\anaconda3\lib\site-packages (from openpyxl) (1.0.1)
df.head(10)
df.tail(10)
df.isnull().sum()
df.columns
df['Month'] = pd.to_datetime(df['Month'])
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14900 entries, 0 to 14899
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Category 14900 non-null object
1 Sub-Category 14900 non-null object
2 Month 14900 non-null datetime64[ns]
3 Millions of Dollars 14900 non-null int64
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 465.8+ KB
df.shape
x = df['Category'].unique()
y = df['Category'].nunique()
print(x)
print('Number Of Unique:', y)
['Alcoholic beverages' 'Cereals and bakery products' 'Fats and oils'
'Fish and seafood' 'Fresh fruits and vegetables' 'Meats and poultry'
'Milk, dairy products, and eggs' 'Processed fruits and vegetables'
'Sugar and sweets' 'Coffee, tea, and other beverage materials'
'Mineral waters, soft drinks, and vegetable juices']
Number Of Unique: 11
x= df['Sub-Category'].unique()
y = df['Sub-Category'].nunique()
print(x)
print('Number Of Unique:', y)
['Beer' 'Spirits' 'Wine' 'Bakery products' 'Cereals' 'Fats and oils'
'Fish and seafood' 'Fruit (fresh)' 'Vegetables (fresh)' 'Beef and veal'
'Other meats' 'Pork' 'Poultry' 'Eggs' 'Fresh milk'
'Processed dairy products' 'Processed fruits and vegetables'
'Sugar and sweets' 'Coffee, tea, and other beverage materials'
'Mineral waters, soft drinks, and vegetable juices']
Number Of Unique: 20
df.sample(10)
df.describe()
df.isnull().any()
ax = sns.barplot(x='Sub-Category', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
ax = sns.barplot(x='Category', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
import datetime
df['Weekday_num'] = pd.DatetimeIndex(df['Month']).weekday
df['Weekday'] = pd.DatetimeIndex(df['Month']).day_name()
df['Year'] = pd.DatetimeIndex(df['Month']).year
df['Month name'] = pd.DatetimeIndex(df['Month']).month_name()
#df.drop('Weekday_name', axis='columns', inplace=True)
df.head(10)
sns.countplot(y='Category', data=df)
#df.groupby(['Year'])['Millions of Dollars'].sum().plot(kind='bar')
sns.set(rc={'figure.figsize':(20,5)})
ax = sns.barplot(x='Year', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
sns.set(rc={'figure.figsize':(20,5)})
ax = sns.barplot(x='Sub-Category', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
df.loc[df['Sub-Category']== 'Beer']
df['Month name'].unique()
x= df.loc[df['Year']== 2021].nunique()