!pip install pandas
Requirement already satisfied: pandas in /opt/venv/lib/python3.7/site-packages (1.0.5)
Requirement already satisfied: python-dateutil>=2.6.1 in /opt/venv/lib/python3.7/site-packages (from pandas) (2.8.1)
Requirement already satisfied: numpy>=1.13.3 in /opt/venv/lib/python3.7/site-packages (from pandas) (1.19.4)
Requirement already satisfied: pytz>=2017.2 in /opt/venv/lib/python3.7/site-packages (from pandas) (2020.4)
Requirement already satisfied: six>=1.5 in /opt/venv/lib/python3.7/site-packages (from python-dateutil>=2.6.1->pandas) (1.15.0)
import pandas as pd
df = pd.read_excel("SampleData.xlsx")
!pip install xlrd
Requirement already satisfied: xlrd in /opt/venv/lib/python3.7/site-packages (1.2.0)
df = pd.read_excel("SampleData.xlsx",sheet_name="SalesOrders")
df
df.describe()
df = df.sort_values("Total")
df
df = df.drop_duplicates()
df
df.drop_duplicates(subset = ['Rep'],keep='first')
df['new_total'] = df["Units"] * df["Unit Cost"]
df
df.groupby(["Rep"])
aggregate = df.groupby(["Rep"]).sum()
aggregate
aggregate.sort_values("Total")
aggregate[aggregate["Total"] > 2000]
aggregate["Total"] > 2000
df.pivot_table(index=['Rep'], aggfunc=sum)
df
df.iloc[1,2] = None # this selects the second row and third column. Python is 0 indexed!
df.iloc[3,:] = None # this sets the whole 4th row to Null
df
df.isna().sum()
df.fillna(method="ffill")
df.fillna(0)
!pip install openpyxl==3.0.5
Requirement already satisfied: openpyxl==3.0.5 in /opt/venv/lib/python3.7/site-packages (3.0.5)
Requirement already satisfied: jdcal in /opt/venv/lib/python3.7/site-packages (from openpyxl==3.0.5) (1.4.1)
Requirement already satisfied: et-xmlfile in /opt/venv/lib/python3.7/site-packages (from openpyxl==3.0.5) (1.0.1)
df.to_excel("newdata.xlsx") #save the data in excel format