# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for age, bmi and insurance charges in separate arrays.
# Print out to check
#
filename='insurance.csv'
age = np.loadtxt ('insurance.csv', skiprows=1, usecols=0, delimiter=',')
print ("Age Data")
print (age)
bmi = np.loadtxt ('insurance.csv', skiprows=1, usecols=2, delimiter=',')
print ("BMI Data")
print (bmi)
charge = np.loadtxt ('insurance.csv', skiprows=1, usecols=6, delimiter=',')
print ("Insurance Charge")
print (charge)
Age Data
[19. 18. 28. ... 18. 21. 61.]
BMI Data
[27.9 33.77 33. ... 36.85 25.8 29.07]
Insurance Charge
[16884.924 1725.5523 4449.462 ... 1629.8335 2007.945 29141.3603]
# print out the number of data instances
print(f"We have {len(age)} data entries")
We have 1338 data entries
# plot charges vs age and add labels to axis and title
plt.scatter (age, charge)
plt.xlabel ("AGE")
plt.ylabel ("Insurance Charge")
plt.title ("Age vs Insurance")
# plot charges vs BMI; add axis labels and plot title
plt.scatter (bmi, charge)
plt.xlabel ("BMI")
plt.ylabel ("Insurance Charge")
plt.title ("BMI vs Insurance")
# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for bmi and insurance charges in separate arrays.
# Print out to check
#
filename='insurance.csv'
bmi = np.loadtxt ('insurance.csv', skiprows=1, usecols=2, delimiter=',')
print ("BMI Data")
print (bmi)
charge = np.loadtxt ('insurance.csv', skiprows=1, usecols=6, delimiter=',')
print ("Insurance Charge")
print (charge)
BMI Data
[27.9 33.77 33. ... 36.85 25.8 29.07]
Insurance Charge
[16884.924 1725.5523 4449.462 ... 1629.8335 2007.945 29141.3603]
# Fit a line using linear regression
line_coeff = np.polyfit(bmi,charge,1)
print (f"The equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
The equation of the line is 393.87303079739513x + 1192.937208961152
# Plot data and line on same graph
f = np.poly1d(line_coeff)
plt.plot(bmi,charge, 'bo')
plt.xlabel("BMI")
plt.ylabel("Insurance Charge")
bmi_min = min(bmi)
bmi_max = max(bmi)
x=np.linspace(bmi_min, bmi_max,100)
plt.plot(x, f(x),'r')
plt.title("Linear regression fit to data")
plt.show()
# Fit a parabola to data
coeffs_quad = np.polyfit (bmi,charge,2)
print (f"Quadratic polynomial is {coeffs_quad[2]}*x^2 +{ coeffs_quad[1]}*x +\
{ coeffs_quad[0]}")
Quadratic polynomial is -5177.034125706157*x^2 +813.9749575731079*x +-6.662322630063166
# Plot data and parabola on same graph
g = np.poly1d(coeffs_quad)
plt.plot(bmi,charge, 'bo')
plt.xlabel("BMI")
plt.ylabel("Insurance Charge")
plt.title ("Quadratic Polynomial fit to data")
x_eval = np.linspace(0,100); y_eval=g(x_eval)
plt.plot(x_eval,y_eval)
plt.show