Summer 2022 Data Science Intern Challenge
Question 1:
Question 2:
Exploratory Data Analysis for Question 1
import pandas as pd
import numpy as np
df = pd.read_csv("/work/Summer-2022-Data-Science-Intern-Challenge-/2019 Winter Data Science Intern Challenge Data Set - Sheet1.csv")
df
df['order_amount'].describe()
df['total_items'].describe()
aov = df.groupby(['shop_id'])['order_amount', 'total_items'].sum()
aov['average_order_value'] = aov['order_amount'] / aov['total_items']
aov
aov['average_order_value'].describe()
pd.set_option("max_rows", None)
aov.sort_values(by='average_order_value', ascending=False)
# checking for median
aov['average_order_value'].median()
# checking for mode
aov['average_order_value'].mode()