# Import libraries
import numpy as np
import matplotlib.pyplot as plt
# Input exact values for comparison
#
dutch_man_1925 = 174.83; us_man_1925 = 174.53
dutch_man_1955 = 180.23; us_man_1955 = 177.22
dutch_man_1995 = 182.54; us_man_1995 = 177.16
#
dutch_woman_1925 = 162.2; us_woman_1925 = 160.97
dutch_woman_1955 = 167.11; us_woman_1955 = 163.54
dutch_woman_1995 = 168.73; us_woman_1995 = 163.56
dutchmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=2)
usmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=4)
print ('Dutch Men Height', dutchmen)
print ('US Men Height', usmen)
Dutch Men Height [155.82 158.31 160.99 163.28 165.2 166.45 167.84 168.85 168.94 168.8 ]
US Men Height [158.74 159.34 160.32 161.69 162.91 163.36 163.76 164.02 163.84 163.65]
# Read in the data and plot; add axes labels, plot title and legend
# Note: to add legend use label option in plt.plot (e.g., label='Dutch Men') and then use
# the command plt.legend ()
#
year = np.loadtxt('human_heights.txt', skiprows=2, usecols=0)
dutchmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=2)
usmen = np.loadtxt('human_heights.txt', skiprows=2, usecols=4)
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
# Linear regression fit for both Dutch and U.S.; plot and print out the line
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
line1= np.polyfit(year,dutchmen, 1)
f1= np.poly1d(line1)
xx=np.linspace(2000, 1900, 10)
yy=f1(xx)
plt.plot(xx,yy,'b')
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
line2= np.polyfit(year,usmen, 1)
f2= np.poly1d(line2)
xx=np.linspace(2000, 1900, 10)
yy=f2(xx)
plt.plot(xx,yy,'r')
plt.show()
# Input our function for calculating the variance
#
# Input: the x and y arrays for the data points, coefficients of line found using LR
#
# Output: variance
def calculate_variance ( x, y,coeff):
n=len(x)
degree = len(coeff) -1
var = 0.0
for i in range(0,n) :
if (degree == 1 ) :
y_line = coeff[0] * x[i] + coeff[1] # calculate value of y on line for given x[i]
else :
y_line = coeff[0] * x[i]*x[i] + coeff[1] *x[i] + coeff[2]
y_data = y[i] # y height of data point at x[i]
distance = y_data-y_line
var = var + distance * distance
var = ( var )/ float(n)
return (var)
#
var = calculate_variance(dutchmen, usmen,line1)
print(' The variance of the linear fit to data is',var)
print(' The standard deviaiton of the linear fit to data is',np.sqrt(var))
def calculate_variance ( x, y,coeff):
n=len(x)
degree = len(coeff) -1
var = 0.0
for i in range(0,n) :
if (degree == 1 ) :
y_line = coeff[0] * x[i] + coeff[1] # calculate value of y on line for given x[i]
else :
y_line = coeff[0] * x[i]*x[i] + coeff[1] *x[i] + coeff[2]
y_data = y[i] # y height of data point at x[i]
distance = y_data-y_line
var = var + distance * distance
var = ( var )/ float(n)
return (var)
#
var = calculate_variance(dutchmen, usmen,line2)
print(' The variance of the linear fit to data is',var)
print(' The standard deviaiton of the linear fit to data is',np.sqrt(var))
The variance of the linear fit to data is 68958.42942586285
The standard deviaiton of the linear fit to data is 262.59937057400356
The variance of the linear fit to data is 11846.535003145753
The standard deviaiton of the linear fit to data is 108.84178886413872
# Quadratic regression fit for Dutch and U.S.; plot and print out the parabolas
parabola_coeff1 = np.polyfit(year,dutchmen,2)
print(f"The equation of the parabola using regression is {parabola_coeff1[0]}x^2 \
+{parabola_coeff1[1]}x + {parabola_coeff1[2]}")
parabola_coeff2 = np.polyfit(year,usmen,2)
print(f"The equation of the parabola using regression is {parabola_coeff2[0]}x^2 \
+{parabola_coeff2[1]}x + {parabola_coeff2[2]}")
plt.plot(year, dutchmen, 'bo', label='Dutch Men')
plt.plot(year, usmen, 'ro', label='US Men')
plt.xlabel = "Years"
plt.ylabel = "Height"
plt.title("Dutch Men vs Dutch Men Height")
plt.legend()
#
# Create data to plot parabola
xx1=np.linspace(2000, 1900, 10)
f1= np.poly1d(parabola_coeff1)
yy1=f1(xx1)
plt.plot(xx1,yy1,'b')
xx2=np.linspace(2000, 1900, 10)
f2= np.poly1d(parabola_coeff2)
yy2=f2(xx2)
plt.plot(xx2,yy2,'r')
plt.show()
The equation of the parabola using regression is -0.0018022727272745678x^2 +7.159604545461751x + -6941.453181825232
The equation of the parabola using regression is -0.0009496212121223673x^2 +3.7551477272772464x + -3548.375106065025
# Calculate variance for the quadratic fits
coeffs_quad1 = np.polyfit (year,dutchmen,2)
print (f"Quadratic polynomial is {coeffs_quad1[2]}*x^2 +{ coeffs_quad1[1]}*x +\
{ coeffs_quad1[0]}")
#
g1 = np.poly1d( coeffs_quad1)
var_quad1 = calculate_variance(year, dutchmen,coeffs_quad1)
print("The variance for a quadratic fit to data is", var_quad1)
coeffs_quad2 = np.polyfit (year,usmen,2)
print (f"Quadratic polynomial is {coeffs_quad2[2]}*x^2 +{ coeffs_quad2[1]}*x +\
{ coeffs_quad2[0]}")
#
g2 = np.poly1d( coeffs_quad2)
var_quad2 = calculate_variance(year, usmen,coeffs_quad2)
print("The variance for a quadratic fit to data is", var_quad2)
Quadratic polynomial is -6941.453181825232*x^2 +7.159604545461751*x +-0.0018022727272745678
The variance for a quadratic fit to data is 0.028612181818147592
Quadratic polynomial is -3548.375106065025*x^2 +3.7551477272772464*x +-0.0009496212121223673
The variance for a quadratic fit to data is 0.07380380303033623
# Use best fit to predict average heights in 1955 and 1995 for both Dutch and U.S.; compute percent error;
# round values to 2 decimal places
#
print (f1(1955))
print (g1(1955))
####my values are coming out the same
167.2422840909112
167.2422840909112