import numpy as np
import pandas as pd
import os
print(f"[ANSWER-1] The version of NumPy is: {np.__version__}")
print(f"[ANSWER-2] The version of Pandas is: {pd.__version__}")
if not os.path.isfile("data.csv"):
!wget https://raw.githubusercontent.com/alexeygrigorev/mlbookcamp-code/master/chapter-02-car-price/data.csv
df = pd.read_csv("data.csv")
df.head()
# Check available car Make
df['Make'].unique()
# Normalizing car Make
df['Make'] = df['Make'].str.lower().str.replace(" ", "_").str.replace("-", "_")
df['Make'].unique()
# Computing BMW cars average price
print(f"[ANSWER-3] Average price of BMW cars is {df[df['Make'] == 'bmw']['MSRP'].mean()}")
print(f"[ANSWER-4] There are {df[df['Year'] >= 2015]['Engine HP'].isnull().sum()} missing values in 'Engine HP'")
mean_hp_before = round(df['Engine HP'].mean())
df['Engine HP Filled'] = df['Engine HP'].fillna(mean_hp_before)
mean_hp_after = round(df['Engine HP Filled'].mean())
print(f"Mean HP before ({mean_hp_before}) and Mean HP after ({mean_hp_after}) are equal?: {mean_hp_before == mean_hp_after}")
print("[ANSwER-5] No, the HP mean value has not change")
rr_df = df[df['Make'] == 'rolls_royce']
rr_df = rr_df[['Engine HP', 'Engine Cylinders', 'highway MPG']]
rr_df.drop_duplicates(inplace=True)
X = rr_df.values
XTX = X.T.dot(X)
XTX_inv = np.linalg.inv(XTX)
print(f"[ANSWER-6] The sum of all elements is: {np.sum(XTX_inv)}")
y = np.array([1000, 1100, 900, 1200, 1000, 850, 1300])
XTXiXT = XTX_inv.dot(X.T)
w = XTXiXT.dot(y)
print(f"The w vector is: {w}")
print(f"[ANSWER-7] The first element of w is: {w[0]}")