import pandas as pd
import numpy as np
from datascience import *
# importing the dataset
india_census_df = pd.read_csv("census_india_2011.csv")
india_census_df
# QUESTION 1: How can we find the shape of the dataset?
india_census_df.shape
# QUESTION 3: Filter out the outliers from the "Population" column.
# Hint: outliers are defined as the minimum - (1.5 * IQR) and maximum + (1.5 * IQR), where
# IQR = Q3 - Q1.
Q3 = np.percentile(india_census_df["Population"], 75)
Q1 = np.percentile(india_census_df["Population"],25)
IQR = Q3-Q1
up_bound = Q3 + (1.5*IQR)
low_bound = Q1 -(1.5*IQR)
out = india_census_df[(india_census_df["Population"]<=up_bound)& (india_census_df["Population"]>=low_bound)]
out
# QUESTION 4: Check to see if there are any duplicates in the "District name" column.
# Return a Boolean.
# Hint: check out the pd.is_unique() function!
india_census_df[("District name")].is_unique