#dataset obtained from dataworld
import os
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
df = pd.read_excel('Personal Consumption Expenditures.xlsx')
pip install openpyxl
df.head(10)
df.tail(10)
df.isnull().sum()
df.columns
df['Month'] = pd.to_datetime(df['Month'])
df.info()
df.shape
x = df['Category'].unique()
y = df['Category'].nunique()
print(x)
print('Number Of Unique:', y)
x= df['Sub-Category'].unique()
y = df['Sub-Category'].nunique()
print(x)
print('Number Of Unique:', y)
df.sample(10)
df.describe()
df.isnull().any()
ax = sns.barplot(x='Sub-Category', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
ax = sns.barplot(x='Category', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
import datetime
df['Weekday_num'] = pd.DatetimeIndex(df['Month']).weekday
df['Weekday'] = pd.DatetimeIndex(df['Month']).day_name()
df['Year'] = pd.DatetimeIndex(df['Month']).year
df['Month name'] = pd.DatetimeIndex(df['Month']).month_name()
#df.drop('Weekday_name', axis='columns', inplace=True)
df.head(10)
sns.countplot(y='Category', data=df)
#df.groupby(['Year'])['Millions of Dollars'].sum().plot(kind='bar')
sns.set(rc={'figure.figsize':(20,5)})
ax = sns.barplot(x='Year', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
sns.set(rc={'figure.figsize':(20,5)})
ax = sns.barplot(x='Sub-Category', y='Millions of Dollars', data=df)
ax.tick_params(axis='x', rotation=90)
df.loc[df['Sub-Category']== 'Beer']
df['Month name'].unique()
x= df.loc[df['Year']== 2021].nunique()