import pandas as pd
import numpy as np
automobile = pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/automobile.csv")
automobile.head(5)
automobilecp=automobile[automobile["price"]==automobile["price"].max()][["company","price"]]
print(automobilecp)
automobile['company'].value_counts()
automobile_least_price = automobile.groupby("company")['price'].min("price")
print(automobile_least_price)
automobile.groupby("body-style")["num-of-cylinders"].value_counts()
import pandas as pd
import numpy as np
bank = pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/bank_marketing.csv", nrows=1000)
bank.head(1)
This chart is empty
Chart was probably not set up properly in the notebook
This chart is empty
Chart was probably not set up properly in the notebook
print(bank.job.unique())
pd.crosstab(bank.education,bank.job,values=bank.age,aggfunc=np.mean)
pd.pivot_table(bank,index=['marital','housing'],columns=['job'],values=['duration'], aggfunc=np.mean, fill_value=0)
bank.describe(include=[np.number])
churn = pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/churn.csv", nrows=1000)
churn.head()
#.describe(include=['category']) is creating error so I used exclude=np.number
category=churn.describe(exclude=[np.number])
category
categorical_cols=churn[churn.select_dtypes(exclude=[np.number]).columns.to_list()]
categorical_cols.head()
pd.crosstab(churn.InternetService, churn.StreamingMovies,values=churn.customerID, rownames=["Streaming Movies"], colnames=["Internet Service"], aggfunc=np.count_nonzero)
Display the average of totalCharges by each paymentMethod for churning and non churning customers
churn.groupby(['TotalCharges','PaymentMethod']).TotalCharges.().reset_index(name="average_Tottal")
import numpy as np
churn.pivot_table(values="TotalCharges",index="PaymentMethod",columns="Churn",aggfunc=np.mean)
IMDB=pd.read_csv("https://raw.githubusercontent.com/dearbharat/pandas/main/IMDB.csv", nrows=1000)
IMDB.head(1)
This chart is empty
Chart was probably not set up properly in the notebook
IMDB["Director"].value_counts()
gen=IMDB[["Genre","Runtime (Minutes)"]].groupby("Genre",as_index=False).mean().round(2)
gen
IMDB.sort_values(['Year','Metascore'],ascending=False).groupby('Year')[["Year","Metascore","Title","Director","Revenue (Millions)"]]. head(3)
agg_df=IMDB.groupby("Director").agg(num_moives=("Title","nunique"),Total_revenue=("Revenue (Millions)","sum")).reset_index().sort_values(by="Total_revenue",ascending=False)
agg_df[["Director","num_moives"]]