import pandas as pd # load library
import matplotlib.pyplot as plt # visualize package
df = pd.read_csv("data.csv", index_col="order_id") # load data
print(f"Number of sneakers shops :{df['shop_id'].nunique()}")
print(f"Average order value (AOV) of thee sneaker shops is {round(df['order_amount'].sum()/len(df), 2)}")
df.head() # view first 5 entries of dataset
print(f"Order amount and total items has {round(df['order_amount'].corr(df['total_items']), 2)}% correlation")
print(f"98% of the order amount is below ${round(df['order_amount'].quantile(.98), 2)}")
fig, ax = plt.subplots(1, 2)
ax[0].boxplot(df['order_amount'], showfliers=False)
ax[0].set_title("No Outliers")
ax[0].get_xaxis().set_visible(False)
ax[1].boxplot(df['order_amount'], showfliers=True)
ax[1].set_title("Outliers")
ax[1].get_xaxis().set_visible(False)
plt.suptitle('Order Amount')
fig.tight_layout()
plt.show()
df['order_amount'].mode().tolist()
SELECT COUNT(*) as total_shipment_speedexpress
FROM Shippers as s
LEFT JOIN Orders as o
ON s.ShipperID = o.ShipperID
WHERE ShipperName = "Speedy Express"
SELECT e.LastName, COUNT(DISTINCT o.OrderID) as num_of_orders
FROM Employees as e
LEFT JOIN Orders as o
ON e.EmployeeID = o.EmployeeID
GROUP BY e.EmployeeID
ORDER BY num_of_orders DESC
LIMIT 1
SELECT p.ProductName, od.OrderID, SUM(od.Quantity) as Total_Quantity, o.CustomerID, c.Country
FROM Products as p
LEFT JOIN OrderDetails as od
ON p.ProductID = od.ProductID
LEFT JOIN Orders as o
ON od.OrderID = o.OrderID
LEFT JOIN Customers as c
ON o.CustomerID = c.CustomerID
WHERE c.Country = "Germany"
GROUP BY p.ProductName
ORDER BY Total_Quantity DESC
LIMIT 1