# Import libraries
import numpy as np
import matplotlib.pyplot as plt
# Input exact values for comparison
#
dutch_man_1925 = 174.83; us_man_1925 = 174.53
dutch_man_1955 = 180.23; us_man_1955 = 177.22
dutch_man_1995 = 182.54; us_man_1995 = 177.16
#
dutch_woman_1925 = 162.2; us_woman_1925 = 160.97
dutch_woman_1955 = 167.11; us_woman_1955 = 163.54
dutch_woman_1995 = 168.73; us_woman_1995 = 163.56
dutchmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=2)
usmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=4)
print ('Dutch Men Height', dutchmen)
print ('US Men Height', usmen)
# Read in the data and plot; add axes labels, plot title and legend
# Note: to add legend use label option in plt.plot (e.g., label='Dutch Men') and then use
# the command plt.legend ()
#
year = np.loadtxt('human_heights.txt', skiprows=2, usecols=0)
dutchmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=2)
usmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=4)
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
# Linear regression fit for both Dutch and U.S.; plot and print out the line
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
line1= np.polyfit(year,dutchmen, 1)
f1= np.poly1d(line1)
xx=np.linspace(2000, 1900, 10)
yy=f1(xx)
plt.plot(xx,yy,'b')
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
line2= np.polyfit(year,usmen, 1)
f2= np.poly1d(line2)
xx=np.linspace(2000, 1900, 10)
yy=f2(xx)
plt.plot(xx,yy,'r')
plt.show()
# Input our function for calculating the variance
#
# Input: the x and y arrays for the data points, coefficients of line found using LR
#
# Output: variance
def calculate_variance ( x, y,coeff):
n=len(x)
degree = len(coeff) -1
var = 0.0
for i in range(0,n) :
if (degree == 1 ) :
y_line = coeff[0] * x[i] + coeff[1] # calculate value of y on line for given x[i]
else :
y_line = coeff[0] * x[i]*x[i] + coeff[1] *x[i] + coeff[2]
y_data = y[i] # y height of data point at x[i]
distance = y_data-y_line
var = var + distance * distance
var = ( var )/ float(n)
return (var)
#
var = calculate_variance(dutchmen, usmen,line1)
print(' The variance of the linear fit to data is',var)
print(' The standard deviaiton of the linear fit to data is',np.sqrt(var))
def calculate_variance ( x, y,coeff):
n=len(x)
degree = len(coeff) -1
var = 0.0
for i in range(0,n) :
if (degree == 1 ) :
y_line = coeff[0] * x[i] + coeff[1] # calculate value of y on line for given x[i]
else :
y_line = coeff[0] * x[i]*x[i] + coeff[1] *x[i] + coeff[2]
y_data = y[i] # y height of data point at x[i]
distance = y_data-y_line
var = var + distance * distance
var = ( var )/ float(n)
return (var)
#
var = calculate_variance(dutchmen, usmen,line2)
print(' The variance of the linear fit to data is',var)
print(' The standard deviaiton of the linear fit to data is',np.sqrt(var))
# Quadratic regression fit for Dutch and U.S.; plot and print out the parabolas
parabola_coeff1 = np.polyfit(year,dutchmen,2)
print(f"The equation of the parabola using regression is {parabola_coeff1[0]}x^2 \
+{parabola_coeff1[1]}x + {parabola_coeff1[2]}")
parabola_coeff2 = np.polyfit(year,usmen,2)
print(f"The equation of the parabola using regression is {parabola_coeff2[0]}x^2 \
+{parabola_coeff2[1]}x + {parabola_coeff2[2]}")
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
#
# Create data to plot parabola
xx1=np.linspace(2000, 1900, 10)
f1= np.poly1d(parabola_coeff1)
yy1=f1(xx1)
plt.plot(xx1,yy1,'b')
xx2=np.linspace(2000, 1900, 10)
f2= np.poly1d(parabola_coeff2)
yy2=f2(xx2)
plt.plot(xx2,yy2,'r')
plt.show()
# Calculate variance for the quadratic fits
coeffs_quad1 = np.polyfit (year,dutchmen,2)
print (f"Quadratic polynomial is {coeffs_quad1[2]}*x^2 +{ coeffs_quad1[1]}*x +\
{ coeffs_quad1[0]}")
#
g1 = np.poly1d( coeffs_quad1)
var_quad1 = calculate_variance(year, dutchmen,coeffs_quad1)
print("The variance for a quadratic fit to data is", var_quad1)
coeffs_quad2 = np.polyfit (year,usmen,2)
print (f"Quadratic polynomial is {coeffs_quad2[2]}*x^2 +{ coeffs_quad2[1]}*x +\
{ coeffs_quad2[0]}")
#
g2 = np.poly1d( coeffs_quad2)
var_quad2 = calculate_variance(year, usmen,coeffs_quad2)
print("The variance for a quadratic fit to data is", var_quad2)
# Use best fit to predict average heights in 1955 and 1995 for both Dutch and U.S.; compute percent error;
# round values to 2 decimal places
#
print (f1(1955))
print (g1(1955))
####my values are coming out the same