03 P01 G05 Data Manipulation

import pandas as pd import numpy as np automobile = pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/automobile.csv") automobile.head(5)

automobilecp=automobile[automobile["price"]==automobile["price"].max()][["company","price"]] print(automobilecp)

automobile['company'].value_counts()

automobile_least_price = automobile.groupby("company")['price'].min("price") print(automobile_least_price)

automobile.groupby("body-style")["num-of-cylinders"].value_counts()

import pandas as pd import numpy as np bank = pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/bank_marketing.csv", nrows=1000) bank.head(1)

This chart is empty

Chart was probably not set up properly in the notebook

This chart is empty

Chart was probably not set up properly in the notebook

print(bank.job.unique())

pd.crosstab(bank.education,bank.job,values=bank.age,aggfunc=np.mean)

pd.pivot_table(bank,index=['marital','housing'],columns=['job'],values=['duration'], aggfunc=np.mean, fill_value=0)

bank.describe(include=[np.number])

churn = pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/churn.csv", nrows=1000) churn.head()

#.describe(include=['category']) is creating error so I used exclude=np.number category=churn.describe(exclude=[np.number]) category

categorical_cols=churn[churn.select_dtypes(exclude=[np.number]).columns.to_list()] categorical_cols.head()

pd.crosstab(churn.InternetService, churn.StreamingMovies,values=churn.customerID, rownames=["Streaming Movies"], colnames=["Internet Service"], aggfunc=np.count_nonzero)

Display the average of totalCharges by each paymentMethod for churning and non churning customers

churn.groupby(['TotalCharges','PaymentMethod']).TotalCharges.().reset_index(name="average_Tottal")

import numpy as np churn.pivot_table(values="TotalCharges",index="PaymentMethod",columns="Churn",aggfunc=np.mean)

IMDB=pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/IMDB.csv", nrows=1000) IMDB.head(1)

This chart is empty

Chart was probably not set up properly in the notebook

IMDB["Director"].value_counts()

gen=IMDB[["Genre","Runtime (Minutes)"]].groupby("Genre",as_index=False).mean().round(2) gen

IMDB.sort_values(['Year','Metascore'],ascending=False).groupby('Year')[["Year","Metascore","Title","Director","Revenue (Millions)"]]. head(3)

agg_df=IMDB.groupby("Director").agg(num_moives=("Title","nunique"),Total_revenue=("Revenue (Millions)","sum")).reset_index().sort_values(by="Total_revenue",ascending=False) agg_df[["Director","num_moives"]]