# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for age, bmi and insurance charges in separate arrays.
# Print out to check
#
filename='insurance.csv'
list_age = np.loadtxt(filename, usecols=0,skiprows=1 , delimiter=',')
list_bmi = np.loadtxt(filename, usecols=2 , skiprows=1, delimiter=',')
list_charges = np.loadtxt(filename, usecols=6 ,skiprows=1, delimiter=',')
print (list_age)
print (list_bmi)
print (list_charges)
[19. 18. 28. ... 18. 21. 61.]
[27.9 33.77 33. ... 36.85 25.8 29.07]
[16884.924 1725.5523 4449.462 ... 1629.8335 2007.945 29141.3603]
# print out the number of data instances
filename='insurance.csv'
gender_inc = np.loadtxt (filename, dtype ='U10', usecols=(1),delimiter=',', skiprows = 1 )
smoker_inc = np.loadtxt (filename, dtype ='U10', usecols=(4), delimiter=',', skiprows = 1)
region_inc = np.loadtxt (filename, dtype ='U10', usecols=(5), delimiter=',', skiprows = 1)
print ("", gender_inc)
print (f"# of sex data points = {len(gender_inc)}")
print ("", smoker_inc)
print (f"# of smoker data points = {len(smoker_inc)}")
print ("", region_inc)
print (f"# of region data points = {len(region_inc)}")
list_age = np.loadtxt(filename, usecols=0,skiprows=1 , delimiter=',')
list_bmi = np.loadtxt(filename, usecols=2 , skiprows=1, delimiter=',')
list_charges = np.loadtxt(filename, usecols=6 ,skiprows=1, delimiter=',')
list_children = np.loadtxt(filename, usecols=3 ,skiprows=1, delimiter=',')
data_instances = (len(gender_inc) + len(smoker_inc) + len(region_inc) + len(list_age) + len(list_bmi) + len(list_charges) + len(list_children))
print (f'total number of data instances = {data_instances}')
['female' 'male' 'male' ... 'female' 'female' 'female']
# of sex data points = 1338
['yes' 'no' 'no' ... 'no' 'no' 'yes']
# of smoker data points = 1338
['southwest' 'southeast' 'southeast' ... 'southeast' 'southwest'
'northwest']
# of region data points = 1338
total number of data instances = 9366
# plot charges vs age and add labels to axis and title
plt.plot(list_age, list_charges, 'bo')
plt.ylabel("Charges")
plt.xlabel("Age")
plt.title("Charges vs Age")
plt.show()
# plot charges vs BMI; add axis labels and plot title
plt.plot(list_bmi,list_charges, 'bo')
plt.ylabel("Charges")
plt.xlabel("BMI")
plt.title("Charges vs BMI")
plt.show()
# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for bmi and insurance charges in separate arrays.
# Print out to check
filename='insurance.csv'
list_bmi = np.loadtxt(filename, usecols=2 , skiprows=1, delimiter=',')
list_charges = np.loadtxt(filename, usecols=6 ,skiprows=1, delimiter=',')
print (list_bmi)
print (list_charges)
[27.9 33.77 33. ... 36.85 25.8 29.07]
[16884.924 1725.5523 4449.462 ... 1629.8335 2007.945 29141.3603]
# Fit a line using linear regression
line_coeff = np.polyfit (list_bmi ,list_charges,1)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
equation of the line is 393.87303079739513x + 1192.937208961152
# Plot data and line on same graph
line_coeff = np.polyfit (list_bmi ,list_charges,1)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
f = np.poly1d(line_coeff)
BMI_min = min(list_bmi )
BMI_max = max(list_bmi)
x=np.linspace(BMI_min, BMI_max,100)
plt.plot(list_bmi,list_charges, 'bo')
plt.plot(x, f(x),'r')
plt.ylabel("Charges")
plt.xlabel("BMI")
plt.title("Charges vs BMI")
plt.show()
equation of the line is 393.87303079739513x + 1192.937208961152
# Fit a parabola to data
line_coeff = np.polyfit (list_bmi ,list_charges,2)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
equation of the line is -6.662322630063166x + 813.9749575731079
# Plot data and parabola on same graph
line_coeff = np.polyfit (list_bmi ,list_charges,2)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
f = np.poly1d(line_coeff)
BMI_min = min(list_bmi )
BMI_max = max(list_bmi)
x=np.linspace(BMI_min, BMI_max,100)
plt.plot(list_bmi,list_charges, 'bo')
plt.plot(x, f(x),'r')
plt.ylabel("Charges")
plt.xlabel("BMI")
plt.title("Charges vs BMI")
plt.show()
equation of the line is -6.662322630063166x + 813.9749575731079