# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for age, bmi and insurance charges in separate arrays.
# Print out to check
#
filename='insurance.csv'
list_age = np.loadtxt(filename, usecols=0,skiprows=1 , delimiter=',')
list_bmi = np.loadtxt(filename, usecols=2 , skiprows=1, delimiter=',')
list_charges = np.loadtxt(filename, usecols=6 ,skiprows=1, delimiter=',')
print (list_age)
print (list_bmi)
print (list_charges)
# print out the number of data instances
filename='insurance.csv'
gender_inc = np.loadtxt (filename, dtype ='U10', usecols=(1),delimiter=',', skiprows = 1 )
smoker_inc = np.loadtxt (filename, dtype ='U10', usecols=(4), delimiter=',', skiprows = 1)
region_inc = np.loadtxt (filename, dtype ='U10', usecols=(5), delimiter=',', skiprows = 1)
print ("", gender_inc)
print (f"# of sex data points = {len(gender_inc)}")
print ("", smoker_inc)
print (f"# of smoker data points = {len(smoker_inc)}")
print ("", region_inc)
print (f"# of region data points = {len(region_inc)}")
list_age = np.loadtxt(filename, usecols=0,skiprows=1 , delimiter=',')
list_bmi = np.loadtxt(filename, usecols=2 , skiprows=1, delimiter=',')
list_charges = np.loadtxt(filename, usecols=6 ,skiprows=1, delimiter=',')
list_children = np.loadtxt(filename, usecols=3 ,skiprows=1, delimiter=',')
data_instances = (len(gender_inc) + len(smoker_inc) + len(region_inc) + len(list_age) + len(list_bmi) + len(list_charges) + len(list_children))
print (f'total number of data instances = {data_instances}')
# plot charges vs age and add labels to axis and title
plt.plot(list_age, list_charges, 'bo')
plt.ylabel("Charges")
plt.xlabel("Age")
plt.title("Charges vs Age")
plt.show()
# plot charges vs BMI; add axis labels and plot title
plt.plot(list_bmi,list_charges, 'bo')
plt.ylabel("Charges")
plt.xlabel("BMI")
plt.title("Charges vs BMI")
plt.show()
# Import NumPy and matplotlib.pyplot
import numpy as np
import matplotlib.pyplot as plt
# Read in the data for bmi and insurance charges in separate arrays.
# Print out to check
filename='insurance.csv'
list_bmi = np.loadtxt(filename, usecols=2 , skiprows=1, delimiter=',')
list_charges = np.loadtxt(filename, usecols=6 ,skiprows=1, delimiter=',')
print (list_bmi)
print (list_charges)
# Fit a line using linear regression
line_coeff = np.polyfit (list_bmi ,list_charges,1)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
# Plot data and line on same graph
line_coeff = np.polyfit (list_bmi ,list_charges,1)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
f = np.poly1d(line_coeff)
BMI_min = min(list_bmi )
BMI_max = max(list_bmi)
x=np.linspace(BMI_min, BMI_max,100)
plt.plot(list_bmi,list_charges, 'bo')
plt.plot(x, f(x),'r')
plt.ylabel("Charges")
plt.xlabel("BMI")
plt.title("Charges vs BMI")
plt.show()
# Fit a parabola to data
line_coeff = np.polyfit (list_bmi ,list_charges,2)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
# Plot data and parabola on same graph
line_coeff = np.polyfit (list_bmi ,list_charges,2)
print (f"equation of the line is {line_coeff[0]}x + {line_coeff[1]}" )
f = np.poly1d(line_coeff)
BMI_min = min(list_bmi )
BMI_max = max(list_bmi)
x=np.linspace(BMI_min, BMI_max,100)
plt.plot(list_bmi,list_charges, 'bo')
plt.plot(x, f(x),'r')
plt.ylabel("Charges")
plt.xlabel("BMI")
plt.title("Charges vs BMI")
plt.show()