%matplotlib inline
!pip install seaborn
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="darkgrid")
Collecting seaborn
Downloading seaborn-0.11.0-py3-none-any.whl (283 kB)
|████████████████████████████████| 283 kB 18.2 MB/s
Requirement already satisfied: matplotlib>=2.2 in /opt/venv/lib/python3.7/site-packages (from seaborn) (3.3.3)
Requirement already satisfied: scipy>=1.0 in /opt/venv/lib/python3.7/site-packages (from seaborn) (1.5.4)
Requirement already satisfied: numpy>=1.15 in /opt/venv/lib/python3.7/site-packages (from seaborn) (1.19.4)
Requirement already satisfied: pandas>=0.23 in /opt/venv/lib/python3.7/site-packages (from seaborn) (1.0.5)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /opt/venv/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.4.7)
Requirement already satisfied: python-dateutil>=2.1 in /opt/venv/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (2.8.1)
Requirement already satisfied: cycler>=0.10 in /opt/venv/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (0.10.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /opt/venv/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (1.3.1)
Requirement already satisfied: pillow>=6.2.0 in /opt/venv/lib/python3.7/site-packages (from matplotlib>=2.2->seaborn) (8.0.1)
Requirement already satisfied: pytz>=2017.2 in /opt/venv/lib/python3.7/site-packages (from pandas>=0.23->seaborn) (2020.4)
Requirement already satisfied: six>=1.5 in /opt/venv/lib/python3.7/site-packages (from python-dateutil>=2.1->matplotlib>=2.2->seaborn) (1.15.0)
Installing collected packages: seaborn
Successfully installed seaborn-0.11.0
WARNING: You are using pip version 20.2.4; however, version 20.3.1 is available.
You should consider upgrading via the '/opt/venv/bin/python -m pip install --upgrade pip' command.
df = pd.read_csv('fortune500.csv')
import pandas as pd
df.head()
df.tail()
df.tail()
df.columns = ['year', 'rank', 'company', 'revenue', 'profit']
len(df)
df.dtypes
non_numeric_profit = df.profit.str.contains('[^0-9.-]')
df.loc[non_numeric_profit].head()
set(df.profit[non_numeric_profit])
len(df.profit[non_numeric_profit])
df = df.loc[~non_numeric_profit]
df.profit = df.profit.apply(pd.to_numeric)
len(df)
df.dtypes
group_by_year = df.loc[:,['year','revenue','profit']].groupby('year')
avgs = group_by_year.mean()
x = avgs.index
y1 = avgs.profit
def plot(x,y,ax,title,y_label):
ax.set_title(title)
ax.set_ylabel(y_label)
ax.plot(x,y)
ax.margins(x=0, y=0)
fig,ax=plt.subplots()
plot(x, y1, ax, 'Increase in mean Fortune 500 company profits', 'Profit(million)')
y2 = avgs.revenue
fig, ax = plt.subplots()
plot(x, y2, ax, 'Increase in mean revenue', 'Revenue(millions)')
avgs
avgs
non_numeric_profit
non_numeric_profit
non_numeric_profit
y1
y1
group_by_year
group_by_year
non_numeric_profit
input
print(input)
123