# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for age, bmi and insurance charges in separate arrays.
# Print out to check
# usecols = index of what you want to grab, delimiter = a string used to separate data types, skiprows = number of lines at first of data file to skip
filename = 'insurance.csv'
age = np.loadtxt(filename, usecols = 0, delimiter = ",", skiprows = 1)
print (f"Ages are {age}")
bmi = np.loadtxt(filename, usecols = 2, delimiter = ",", skiprows = 1)
print (f"BMIs are {bmi}")
charges = np.loadtxt(filename, usecols = 6, delimiter = ",", skiprows = 1)
print (f"Charges are {charges}")
# print out the number of data instances
print (f"The number of data instances in Age are {len(age)}")
print (f"The number of data instances in BMI are {len(bmi)}")
print (f"The number of data instances in Charges are {len(charges)}")
# Scatter plot charges (y-axis) vs age (x-axis) and add labels to axis and title
plt.scatter(age, charges)
plt.xlabel("Age (in years)")
plt.ylabel("Charges (in dollars)")
plt.title("Age vs Insurance Charges")
# Scatter plot charges (y-axis) vs BMI (x-axis); add axis labels and plot title
plt.scatter(bmi, charges)
plt.xlabel("BMI")
plt.ylabel("Charges (in dollars)")
plt.title("BMI vs Insurance Charges")
# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for bmi and insurance charges in separate arrays.
# Print out to check
filename = 'insurance.csv'
bmi = np.loadtxt(filename, usecols = 2, delimiter = ",", skiprows = 1)
print (f"The BMIs are {bmi}")
charges = np.loadtxt(filename, usecols = 6, delimiter = ",", skiprows = 1)
print (f"Insurance charges are {charges}")
# Fit a line using linear regression
coeff_line = np.polyfit(bmi, charges, 1)
f = np.poly1d(coeff_line)
print (f"The line fit for linear regression is {f}")
# Plot data and line on same graph
plt.scatter(bmi,charges)
bmi_min = np.min(bmi)
bmi_max = np.max(bmi)
x_line = np.linspace(bmi_min, bmi_max, 2)
y_line = f(x_line)
plt.plot(x_line, y_line, "r")
plt.xlabel("BMI")
plt.ylabel("Charges (in dollars)")
plt.title("BMI vs. Insurance Charges")
# Fit a parabola to data
coeff_quad = np.polyfit(bmi, charges, 2)
p = np.poly1d(coeff_quad)
print(f"The parabola fit to data is:")
print(p)
# Plot data and parabola on same graph
plt.scatter(bmi, charges)
bmi_min = np.min(bmi)
bmi_max = np.max(bmi)
x_par = np.linspace(bmi_min, bmi_max, 3)
y_par = p(x_par)
plt.plot(x_par, y_par, "r")
plt.xlabel("BMI")
plt.ylabel("Charges (in dollars")
plt.title("BMI vs Insurance Charges")