import numpy as np
import pandas as pd
from numpy.random import randn
np.random.seed(101)
df = pd.DataFrame(randn(5,4), ["a", "b", "c", "d", "e"], ["one", "two", "three", "four"])
df
randn(5,4)
# Call columns
# df.one
df[["one", "two"]]
# Create new coloum
df["new_column"] = df["one"] + df["two"]
df
df.drop("new_column", axis=1)
df
#Para eliminarlo del df original. Inplace true
df.drop("new_column", axis=1, inplace=True)
df
df >= 0
df[df >= 0]
df["one"]>= 0
# Si quiero filtrar
df[df["one"]>=0]
# Si negar una condición
df[~(df["one"]>=0)]
This chart is empty
Chart was probably not set up properly in the notebook
df[(df["one"]>=0) & (df["four"]< 1)]
# Quitar los index
df.reset_index()
index = ["a1", "a2", "a3", "a4", "a5"]
df["index"] = index
df.set_index("index")
# Index Levels
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)
type(hier_index)
hier_index
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])
df
df.loc["G1", 1]
df.index.names
df.index.names = ["Grupos", "numeros"]
df.index
df.xs(1, level="numeros")
df = pd.DataFrame(np.random.rand(5,4))
df[df <= 0.5] = np.nan
df
df
# Retorna los row que tenga almenos 3 not-nan
df.dropna(thresh=3)
df.fillna(0)
df.mean()
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)
df
df_company = df.groupby("Company")
df_company.mean()
df_company.sum()
df_company.sample()
df.groupby("Company").sum().loc["FB"]
df.groupby("Company").count()
df.groupby("Company").describe().transpose()
df_books = pd.read_csv("/work/bestsellers-with-categories_e591527f-ae45-4fa5-b0d1-d50142128fa6.csv")
df_books
df_books.groupby("Author").sum()
df_books.groupby("Author").agg(["min", "max"])
df_books.groupby("Author").agg({"Year": "mean", "User Rating": "min"})
df_books.groupby('Author').agg({"Year": lambda x: x.mean() - 2000})
url = "https://static.platzi.com/media/public/uploads/hpcharactersdataraw_3d934e85-dfa4-42ec-8520-fadfbecae574.json"
pd.read_json(url)
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
print(left, "\n\n", right)
pd.merge(left, right, how="inner", on="key")
left = pd.DataFrame({
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']},
index = ['K0', 'K1', 'K2', 'K3'])
right = pd.DataFrame({ 'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index = ['K0', 'K1', 'K2', 'K3'])
print(left, "\n\n", right)
left.join(right)
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()
df["col2"].unique()
# Si quiero la cuenta de los unicos
df["col2"].nunique()
# Retorna el numero de ocurencias de cada label
df["col2"].value_counts()
df
df["col2"].apply(lambda x: x*2)
df.drop("col1", axis=1)
#sort by columns
df.sort_values('col2')
data = {'A':['foo','foo','foo','bar','bar','bar'],
'B':['one','one','two','two','one','one'],
'C':['x','y','x','y','x','y'],
'D':[1,3,2,5,4,1]}
df = pd.DataFrame(data)
df
df.pivot_table(values="D", columns="A").T
df.pivot_table(values="D", index=["A", "B"], columns="C").T