# Import libraries
import numpy as np
np.loadtxt('human_heights.txt')
# Input exact values for comparison
#
dutch_man_1925 = 174.83; us_man_1925 = 174.53
dutch_man_1955 = 180.23; us_man_1955 = 177.22
dutch_man_1995 = 182.54; us_man_1995 = 177.16
#
dutch_woman_1925 = 162.2; us_woman_1925 = 160.97
dutch_woman_1955 = 167.11; us_woman_1955 = 163.54
dutch_woman_1995 = 168.73; us_woman_1995 = 163.56
import numpy as np
import matplotlib.pyplot as plt
dutch_man_1925 = 174.83; us_man_1925 = 174.53
dutch_man_1955 = 180.23; us_man_1955 = 177.22
dutch_man_1995 = 182.54; us_man_1995 = 177.16
# Read in the data and plot; add axes labels, plot title and legend
filename = "human_heights.txt"
years = np.loadtxt(filename,skiprows=2, usecols= (0))
dutch_men = np.loadtxt(filename,skiprows=2, usecols=(1))
us_men = np.loadtxt(filename,skiprows=2, usecols=(3))
plt.plot(years,dutch_men, 'ro',label='Dutch Men')
plt.plot(years,us_men,'go',label='U.S. Men')
plt.xlabel("year")
plt.ylabel("height in centimeters")
plt.title ("Average Height of Dutch and US Men")
plt.legend()
plt.show()
# Note: to add legend use label option in plt.plot (e.g., label='Dutch Men') and then use
# the command plt.legend ()
#
# Linear regression fit for both Dutch and U.S.; plot and print out the line
filename = "human_heights.txt"
years = np.loadtxt(filename,skiprows=2, usecols= (0))
dutch_men = np.loadtxt(filename,skiprows=2, usecols=(1))
us_men = np.loadtxt(filename,skiprows=2, usecols=(3))
dutch_men_coeff = np.polyfit(years, dutch_men, 1)
dutch_men_fit = np.polyval(dutch_men_coeff, years)
us_men_coeff = np.polyfit(years, us_men, 1)
us_men_fit = np.polyval(us_men_coeff, years)
print('Coefficients for Dutch men fit:', dutch_men_coeff)
print('Coefficients for US men fit:', us_men_coeff)
plt.plot(years, dutch_men, 'o', label='Dutch men')
plt.plot(years, us_men, 'o', label='US men')
plt.plot(years, dutch_men_fit, '-', label='Dutch men fit')
plt.plot(years, us_men_fit, '-', label='US men fit')
plt.xlabel('Year')
plt.ylabel('Height (cm)')
plt.title('Average Height of Dutch and US Men')
plt.legend()
plt.show()
Coefficients for Dutch men fit: [ 0.14849697 -111.13660606]
Coefficients for US men fit: [ 0.06795758 43.42751515]
# Calculate the variance for each fit; use the function that we wrote in a previous notebook
# Calculate variances for linear regression models
import numpy as np
import matplotlib.pyplot as plt
def calculate_variance(x, y, coeff):
filename = "human_heights.txt"
years = np.loadtxt(filename,skiprows=2, usecols= (0))
dutch_men = np.loadtxt(filename,skiprows=2, usecols=(1))
us_men = np.loadtxt(filename,skiprows=2, usecols=(3))
dutch_men_var = calculate_variance(years, dutch_men, dutch_men_coeff)
us_men_var = calculate_variance(years, us_men, us_men_coeff)
print('Variance for Dutch men fit:', dutch_men_var)
print('Variance for US men fit:', us_men_var)
# Input: the x and y arrays for the data points, coefficients of line found using LR
#
# Output: variance
#
Variance for Dutch men fit: 0.7567162424242179
Variance for US men fit: 0.7133635151515145
# Quadratic regression fit for Dutch and U.S. ; plot and print out parabolas
import numpy as np
import matplotlib.pyplot as plt
filename = "human_heights.txt"
years = np.loadtxt(filename,skiprows=2, usecols= (0))
dutch_men = np.loadtxt(filename,skiprows=2, usecols=(1))
us_men = np.loadtxt(filename,skiprows=2, usecols=(3))
dutch_men_coeff = np.polyfit(years, dutch_men, 2)
us_men_coeff = np.polyfit(years, us_men, 2)
dutch_men_fit = np.polyval(dutch_men_coeff, years)
us_men_fit = np.polyval(us_men_coeff, years)
plt.plot(years, dutch_men, label='Dutch men')
plt.plot(years, us_men, label='US men')
plt.plot(years, dutch_men_fit, '-.', label='Dutch men quadratic fit')
plt.plot(years, us_men_fit, '-.', label='US men quadratic fit')
plt.xlabel('Year')
plt.ylabel('Height (cm)')
plt.title('Average Height of Dutch and US Men')
plt.legend()
plt.show()
# Calculate variance for the quadratic fits
import numpy as np
import matplotlib.pyplot as plt
# load data from file
filename = "human_heights.txt"
years = np.loadtxt(filename,skiprows=2, usecols= (0))
dutch_men = np.loadtxt(filename,skiprows=2, usecols=(1))
us_men = np.loadtxt(filename,skiprows=2, usecols=(3))
# fit quadratic regression lines
dutch_men_coeff = np.polyfit(years, dutch_men, 2)
us_men_coeff = np.polyfit(years, us_men, 2)
# calculate predicted values of y for each x
dutch_men_pred = dutch_men_coeff[0]*years**2 + dutch_men_coeff[1]*years + dutch_men_coeff[2]
us_men_pred = us_men_coeff[0]*years**2 + us_men_coeff[1]*years + us_men_coeff[2]
# calculate variances
dutch_men_var = np.mean((dutch_men_pred - dutch_men)**2)
us_men_var = np.mean((us_men_pred - us_men)**2)
print("Variance of Dutch men's height data:", dutch_men_var)
print("Variance of US men's height data:", us_men_var)
Variance of Dutch men's height data: 0.14137533333338406
Variance of US men's height data: 0.036083212121067784
# Use best fit to predict average heights in 1955 and 1995 for both Dutch and U.S.; compute percent error;
# round values to 2 decimal places
#
import numpy as np
import matplotlib.pyplot as plt
# load data from file
filename = "human_heights.txt"
years = np.loadtxt(filename, skiprows=2, usecols=(0))
dutch_men = np.loadtxt(filename, skiprows=2, usecols=(1))
us_men = np.loadtxt(filename, skiprows=2, usecols=(3))
# Predict average heights in 1955 and 1995 for Dutch men
dutch_men_height_1955 = dutch_men_coeff[0]*1955**2 + dutch_men_coeff[1]*1955 + dutch_men_coeff[2]
dutch_men_height_1995 = dutch_men_coeff[0]*1995**2 + dutch_men_coeff[1]*1995 + dutch_men_coeff[2]
# Predict average heights in 1955 and 1995 for US men
us_men_height_1955 = us_men_coeff[0]*1955**2 + us_men_coeff[1]*1955 + us_men_coeff[2]
us_men_height_1995 = us_men_coeff[0]*1995**2 + us_men_coeff[1]*1995 + us_men_coeff[2]
# Calculate percent errors
dutch_men_1955_error = abs((dutch_men_height_1955 - 183.8) / 183.8) * 100
dutch_men_1995_error = abs((dutch_men_height_1995 - 182.4) / 182.4) * 100
us_men_1955_error = abs((us_men_height_1955 - 177.1) / 177.1) * 100
us_men_1995_error = abs((us_men_height_1995 - 176.1) / 176.1) * 100
# Round results to 2 decimal places
dutch_men_height_1955 = round(dutch_men_height_1955, 2)
dutch_men_height_1995 = round(dutch_men_height_1995, 2)
us_men_height_1955 = round(us_men_height_1955, 2)
us_men_height_1995 = round(us_men_height_1995, 2)
dutch_men_1955_error = round(dutch_men_1955_error, 2)
dutch_men_1995_error = round(dutch_men_1995_error, 2)
us_men_1955_error = round(us_men_1955_error, 2)
us_men_1995_error = round(us_men_1995_error, 2)
# Print results
print("Predicted average height for Dutch men in 1955: ", dutch_men_height_1955, " cm")
print("Predicted average height for Dutch men in 1995: ", dutch_men_height_1995, " cm")
print("Predicted average height for US men in 1955: ", us_men_height_1955, " cm")
print("Predicted average height for US men in 1995: ", us_men_height_1995, " cm")
print("Percent error in predicting average height for Dutch men in 1955: ", dutch_men_1955_error, " %")
print("Percent error in predicting average height for Dutch men in 1995: ", dutch_men_1995_error, " %")
print("Percent error in predicting average height for US men in 1955: ", us_men_1955_error, " %")
print("Percent error in predicting average height for US men in 1995: ", us_men_1995_error, " %")
Predicted average height for Dutch men in 1955: 179.96 cm
Predicted average height for Dutch men in 1995: 183.31 cm
Predicted average height for US men in 1955: 177.11 cm
Predicted average height for US men in 1995: 177.11 cm
Percent error in predicting average height for Dutch men in 1955: 2.09 %
Percent error in predicting average height for Dutch men in 1995: 0.5 %
Percent error in predicting average height for US men in 1955: 0.0 %
Percent error in predicting average height for US men in 1995: 0.57 %
Quadratic fits the best because the variance is lesser compared to the linear.