# Import libraries
import numpy as np
import matplotlib.pyplot as plt

# Input exact values for comparison
#
dutch_man_1925 = 174.83; us_man_1925 = 174.53
dutch_man_1955 = 180.23; us_man_1955 = 177.22
dutch_man_1995 = 182.54; us_man_1995 = 177.16
#
dutch_woman_1925 = 162.2; us_woman_1925 = 160.97
dutch_woman_1955 = 167.11; us_woman_1955 = 163.54
dutch_woman_1995 = 168.73; us_woman_1995 = 163.56

# Read in the data and plot; add axes labels, plot title and legend
# Note: to add legend use label option in plt.plot (e.g., label='Dutch Men') and then use
# the command plt.legend ()
#
year = np.loadtxt ('human_heights.txt', usecols=(0), skiprows = 2)
dutchmen = np.loadtxt ('human_heights.txt', usecols=(1), skiprows = 2)
usmen = np.loadtxt ('human_heights.txt', usecols=(3), skiprows = 2)
#print('year', year)
#print('dutchmen', dutchmen)
#print('usmen', usmen)
plt.plot(year, dutchmen, 'bo', label = 'Dutch Men')
plt.plot(year, usmen,'ro', label = 'American Men')
plt.xlabel("Year")
plt.ylabel("Heights in cm")
plt.title("Yearly Dutch and American Male Heights")
plt.legend()
plt.show()

# Linear regression fit for both Dutch and U.S.; plot and print out the line
Dutchline_coeff = np.polyfit(year, dutchmen,1)
USline_coeff = np.polyfit(year, usmen, 1)
f = np.poly1d(Dutchline_coeff)
g = np.poly1d(USline_coeff)
yeareval = [1900, 1925, 1955, 1985]; Dutcheval = f(yeareval)
USeval = g(yeareval)
print(f"The equation of the Dutch line using linear regression is {Dutchline_coeff[0]}x \
{Dutchline_coeff[1]}")
print(f"The equation of the US line using linear regression is {USline_coeff[0]}x \
+{USline_coeff[1]}")
plt.plot(year, dutchmen, 'bo', label = 'Dutch Men')
plt.plot(yeareval, Dutcheval,'-b')
plt.plot(year, usmen,'ro', label = 'American Men')
plt.plot(yeareval, USeval,'r-')
plt.xlabel("Year")
plt.ylabel("Heights in cm")
plt.title("Yearly Dutch and American Male Heights")
plt.legend()
plt.show()

```
The equation of the Dutch line using linear regression is 0.14849696969696968x -111.136606060606
The equation of the US line using linear regression is 0.06795757575757588x +43.42751515151488
```

# Calculate the variance for each fit; use the function that we wrote in a previous notebook
# Input our function for calculating the variance
def calculate_variance ( x, y,coeff):
#
# Input: the x and y arrays for the data points, coefficients of line found using LR
#
# Output: variance
#
n=len(x)
degree = len(coeff) -1
var = 0.0
for i in range(0,n) :
if (degree == 1 ) :
y_line = coeff[0] * x[i] + coeff[1] # calculate value of y on line for given x[i]
else :
y_line = coeff[0] * x[i]*x[i] + coeff[1] *x[i] + coeff[2]
y_data = y[i] # y height of data point at x[i]
distance = y_data-y_line
var = var + distance * distance
var = ( var )/ float(n)
return (var)
#
# Input: the x and y arrays for the data points, coefficients of line found using LR
#
# Output: variance
#
Dutchvarlinear = calculate_variance(year, dutchmen,Dutchline_coeff)
USvarlinear = calculate_variance(year, usmen, USline_coeff)
print("The variance for a linear fit to the Dutch men data is", Dutchvarlinear)
print("The variance for a linear fit to the US men data is", USvarlinear)

```
The variance for a linear fit to the Dutch men data is 0.7567162424242179
The variance for a linear fit to the US men data is 0.7133635151515145
```

# Quadratic regression fit for Dutch and U.S.; plot and print out the parabolas
Dutchquad_coeff = np.polyfit(year, dutchmen,2)
USquad_coeff = np.polyfit(year, usmen, 2)
y = np.poly1d(Dutchquad_coeff)
z = np.poly1d(USquad_coeff)
Dutchquadeval = y(yeareval)
USquadeval = z(yeareval)
print(f"The equation for Dutch men using quadratic regression is {Dutchquad_coeff[2]}*x^2 + \
{Dutchquad_coeff[1]}*x + {Dutchquad_coeff[0]}")
print(f"The equation for US men using quadratic regression is {USquad_coeff[2]}*x^2 + \
{USquad_coeff[1]}*x + {USquad_coeff[0]}")
plt.plot(year, dutchmen, 'bo', label = 'Dutch Men')
plt.plot(yeareval, Dutchquadeval,'-b')
plt.plot(year, usmen,'ro', label = 'American Men')
plt.plot(yeareval, USquadeval,'r-')
plt.xlabel("Year")
plt.ylabel("Heights in cm")
plt.title("Yearly Dutch and American Male Heights")
plt.legend()
plt.show()

```
The equation for Dutch men using quadratic regression is -4194.193424248043*x^2 + 4.347928787884533*x + -0.0010795454545469232
The equation for US men using quadratic regression is -4240.200515157401*x^2 + 4.473677272733291*x + -0.0011325757575772958
```

# Calculate variance for the quadratic fits
Dutchvarquad = calculate_variance(year, dutchmen,Dutchquad_coeff)
USvarquad = calculate_variance(year, usmen, USquad_coeff)
print("The variance for a quadratic fit to the Dutch men data is", Dutchvarquad)
print("The variance for a quadratic fit to the US men data is", USvarquad)

```
The variance for a quadratic fit to the Dutch men data is 0.14137533333316604
The variance for a quadratic fit to the US men data is 0.03608321212110939
```

# Use best fit to predict average heights in 1955 and 1995 for both Dutch and U.S.; compute percent error;
# round values to 2 decimal places
#
Dutchpredictedquad1955 = y(1955)
Dutchpredictedquad1995 = y(1995)
USpredictedquad1955 = z(1955)
USpredictedquad1995 = z(1995)
Dutch1955se = (abs(Dutchpredictedquad1955 - dutch_man_1955)) / dutch_man_1955
Dutch1995se = (abs(Dutchpredictedquad1995 - dutch_man_1995)) / dutch_man_1995
us1995se = (abs(USpredictedquad1955 - us_man_1955)) / us_man_1955
us1995se = (abs(USpredictedquad1995 - us_man_1995)) / us_man_1995
print(f"The predicted heights for Dutch men in 1955 is {Dutchpredictedquad1955} and in 1995 is {Dutchpredictedquad1995}")
print(f"The predicted heights for American men in 1955 is {USpredictedquad1955} and in 1995 is {USpredictedquad1995}")
print(f"The Standard Error for 1955 is {Dutch1955se} for Dutch men and {Dutch1955se} for American men")
print(f"The Standard Error for 1955 is {us1995se} for Dutch men and {us1995se} for American men")

```
The predicted heights for Dutch men in 1955 is 179.95764015151508 and in 1995 is 183.30660984848237
The predicted heights for American men in 1955 is 177.10569318181933 and in 1995 is 177.1058143939381
The Standard Error for 1955 is 0.001511179318009824 for Dutch men and 0.001511179318009824 for American men
The Standard Error for 1955 is 0.0003058568867797856 for Dutch men and 0.0003058568867797856 for American men
```