# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for age, bmi and insurance charges in separate arrays.
# Print out to check
# usecols = index of what you want to grab, delimiter = a string used to separate data types, skiprows = number of lines at first of data file to skip
filename = 'insurance.csv'
age = np.loadtxt(filename, usecols = 0, delimiter = ",", skiprows = 1)
print (f"Ages are {age}")
bmi = np.loadtxt(filename, usecols = 2, delimiter = ",", skiprows = 1)
print (f"BMIs are {bmi}")
charges = np.loadtxt(filename, usecols = 6, delimiter = ",", skiprows = 1)
print (f"Charges are {charges}")
Ages are [19. 18. 28. ... 18. 21. 61.]
BMIs are [27.9 33.77 33. ... 36.85 25.8 29.07]
Charges are [16884.924 1725.5523 4449.462 ... 1629.8335 2007.945 29141.3603]
# print out the number of data instances
print (f"The number of data instances in Age are {len(age)}")
print (f"The number of data instances in BMI are {len(bmi)}")
print (f"The number of data instances in Charges are {len(charges)}")
The number of data instances in Age are 1338
The number of data instances in BMI are 1338
The number of data instances in Charges are 1338
# Scatter plot charges (y-axis) vs age (x-axis) and add labels to axis and title
plt.scatter(age, charges)
plt.xlabel("Age (in years)")
plt.ylabel("Charges (in dollars)")
plt.title("Age vs Insurance Charges")
# Scatter plot charges (y-axis) vs BMI (x-axis); add axis labels and plot title
plt.scatter(bmi, charges)
plt.xlabel("BMI")
plt.ylabel("Charges (in dollars)")
plt.title("BMI vs Insurance Charges")
# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for bmi and insurance charges in separate arrays.
# Print out to check
filename = 'insurance.csv'
bmi = np.loadtxt(filename, usecols = 2, delimiter = ",", skiprows = 1)
print (f"The BMIs are {bmi}")
charges = np.loadtxt(filename, usecols = 6, delimiter = ",", skiprows = 1)
print (f"Insurance charges are {charges}")
The BMIs are [27.9 33.77 33. ... 36.85 25.8 29.07]
Insurance charges are [16884.924 1725.5523 4449.462 ... 1629.8335 2007.945 29141.3603]
# Fit a line using linear regression
coeff_line = np.polyfit(bmi, charges, 1)
f = np.poly1d(coeff_line)
print (f"The line fit for linear regression is {f}")
The line fit for linear regression is
393.9 x + 1193
# Plot data and line on same graph
plt.scatter(bmi,charges)
bmi_min = np.min(bmi)
bmi_max = np.max(bmi)
x_line = np.linspace(bmi_min, bmi_max, 2)
y_line = f(x_line)
plt.plot(x_line, y_line, "r")
plt.xlabel("BMI")
plt.ylabel("Charges (in dollars)")
plt.title("BMI vs. Insurance Charges")
# Fit a parabola to data
coeff_quad = np.polyfit(bmi, charges, 2)
p = np.poly1d(coeff_quad)
print(f"The parabola fit to data is:")
print(p)
The parabola fit to data is:
2
-6.662 x + 814 x - 5177
# Plot data and parabola on same graph
plt.scatter(bmi, charges)
bmi_min = np.min(bmi)
bmi_max = np.max(bmi)
x_par = np.linspace(bmi_min, bmi_max, 3)
y_par = p(x_par)
plt.plot(x_par, y_par, "r")
plt.xlabel("BMI")
plt.ylabel("Charges (in dollars")
plt.title("BMI vs Insurance Charges")