import numpy as np
import matplotlib.pyplot as plt
def sample(n):
# extra n random 0's or 1's, multiply by 2, then subtract by 1 to get -1 or 1
return 2*np.random.randint(2,size=n)-1
def sample_mean(n_obs,n):
return [np.mean(sample(n)) for _ in range(n_obs)]
data = [sample_mean(1000, n) for n in [10,100,1000,10000]]
# plot histograms for 1b
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2,ncols=2)
fig.suptitle('Histograms for Various n')
# give specifics for each subplot
ax1.set_title('10 Samples')
ax1.set_xlim([-1, 1])
ax1.set_xticks([])
ax1.hist(data[0], density=True)
ax2.set_title('100 Samples')
ax2.set_xlim([-1, 1])
ax2.set_xticks([])
ax2.hist(data[1], density=True)
ax3.set_title('1000 Samples')
ax3.set_xlabel('Sample Average')
ax3.set_xlim([-1, 1])
ax3.hist(data[2], density=True)
ax4.set_title('10000 Samples')
ax4.set_xlabel('Sample Average')
ax4.set_xlim([-1, 1])
ax4.hist(data[3], density=True)
plt.show()
data_sqr = np.square(data)
var = [np.mean(n) for n in data_sqr]
logvar = np.log10(var)
#generate plot x values (log10 of 10, 100, 1000, 10000)
xes = [np.log10(x) for x in [10, 100, 1000, 10000]]
plt.plot(xes, logvar, 'o')
plt.xlabel(r'$log_{10} (n)$')
plt.ylabel(r'$log_{10} (var(\bar{x}))$')
plt.show()
xline = np.arange(1, 4.05, 0.05)
fit = -xline
plt.plot(xes, logvar, 'o')
plt.xlabel(r'$log_{10} (n)$')
plt.ylabel(r'$log_{10} (var(\bar{x}))$')
plt.plot(xline, fit)
plt.show()
def probs(data, n_bins):
# use numpy's histrogram function to separate data into finite pieces of x
ns, bins = np.histogram(data,bins=n_bins)
# this line returns the midpoint of each bin to serve as the x values for each plot below
bins = bins[:-1] + 0.5 * (bins[1:]-bins[:-1])
# taking the number of sample means in each bin, dividing by the total number of samples
# and multiplying by bin size, we get the probability density we need
ps = ns/np.sum(ns)*(bins[1]-bins[0])
return bins, ps
xs = np.linspace(-1,1,100)
plt.plot(xs,-xs**2/2)
xs, ps = probs(data[0], 20)
plt.xlabel(r"$\bar{x}_n$")
plt.ylabel(r"$n log(p(\bar{x}_n))$")
# for each value of n, take the logarithm, shift the maximum to 0 and divide by n
plt.plot(xs, (np.log(ps)-np.max(np.log(ps)))/10, "o", label='n=10')
xs, ps = probs(data[1], 20)
plt.plot(xs, (np.log(ps)-np.max(np.log(ps)))/100, "o", label='n=100')
xs, ps = probs(data[2], 20)
plt.plot(xs, (np.log(ps)-np.max(np.log(ps)))/1000, "o", label='n=1000')
xs, ps = probs(data[2], 10)
plt.plot(xs, (np.log(ps)-np.max(np.log(ps)))/10000, "o", label='n=10000')
plt.legend()
plt.show()
/Users/josephkelly/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: divide by zero encountered in log
/Users/josephkelly/opt/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: divide by zero encountered in log
def make_gaussian(x, mu, sigma_sqr):
prefactor = 1/np.sqrt(2*np.pi*np.sqrt(sigma_sqr))
exponent = -(x - mu)**2 / (2*sigma_sqr)
rho = prefactor * np.exp(exponent)
return rho
x = np.arange(-1, 1.01, 0.005)
curves = [make_gaussian(x, 0, i) for i in [0.1, 0.01, 0.001, 0.0001]]
# initialize 4 subplots
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2,ncols=2)
fig.suptitle('Gaussians for 2a')
ax1.set_title('n=10')
ax1.plot(x, curves[0])
ax1.set_xticks([])
ax2.set_title('n=100')
ax2.plot(x, curves[1])
ax2.set_xticks([])
ax3.set_title('n=1000')
ax3.plot(x, curves[2])
ax4.set_title('n=10000')
ax4.plot(x, curves[3])
plt.show()
m = np.linspace(0, 1, 100) #a different way to generate evenly spaced points on an integral
m = m[1:-1] # this removes problem values of 0 and 1 where the logs will calculate infinities
I = -((1-m)*np.log(1-m)+m*np.log(m)+np.log(2))
plt.plot(m, I)
plt.xlabel(r'$m_+$')
plt.ylabel(r'$I~(m_+)$')
plt.show()