ML ZoomCamp Homework Session 1

import numpy as np import pandas as pd import os

print(f"[ANSWER-1] The version of NumPy is: {np.__version__}")

print(f"[ANSWER-2] The version of Pandas is: {pd.__version__}")

if not os.path.isfile("data.csv"): !wget https://raw.githubusercontent.com/alexeygrigorev/mlbookcamp-code/master/chapter-02-car-price/data.csv df = pd.read_csv("data.csv") df.head()

# Check available car Make df['Make'].unique()

# Normalizing car Make df['Make'] = df['Make'].str.lower().str.replace(" ", "_").str.replace("-", "_") df['Make'].unique()

# Computing BMW cars average price print(f"[ANSWER-3] Average price of BMW cars is {df[df['Make'] == 'bmw']['MSRP'].mean()}")

print(f"[ANSWER-4] There are {df[df['Year'] >= 2015]['Engine HP'].isnull().sum()} missing values in 'Engine HP'")

mean_hp_before = round(df['Engine HP'].mean()) df['Engine HP Filled'] = df['Engine HP'].fillna(mean_hp_before) mean_hp_after = round(df['Engine HP Filled'].mean()) print(f"Mean HP before ({mean_hp_before}) and Mean HP after ({mean_hp_after}) are equal?: {mean_hp_before == mean_hp_after}") print("[ANSwER-5] No, the HP mean value has not change")

rr_df = df[df['Make'] == 'rolls_royce'] rr_df = rr_df[['Engine HP', 'Engine Cylinders', 'highway MPG']] rr_df.drop_duplicates(inplace=True) X = rr_df.values XTX = X.T.dot(X) XTX_inv = np.linalg.inv(XTX) print(f"[ANSWER-6] The sum of all elements is: {np.sum(XTX_inv)}")

y = np.array([1000, 1100, 900, 1200, 1000, 850, 1300]) XTXiXT = XTX_inv.dot(X.T) w = XTXiXT.dot(y) print(f"The w vector is: {w}") print(f"[ANSWER-7] The first element of w is: {w[0]}")