import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as st
from scipy.stats import norm
import math
import statsmodels.api as sm
import pylab
from math import gamma
from scipy.special import gammaln
sns.set()
data = [301.20, 238.82, 252.79, 212.17, 325.43, 245.92, 200.08, 307.88, 193.33, 232.56,
243.39, 162.40, 226.75, 231.37, 208.21, 226.49, 297.49, 252.77, 289.41, 283.34,
265.80, 280.76, 240.61, 287.22, 216.95, 264.74, 232.78, 204.10, 227.01, 231.31]
sns.histplot(data).set_title("Histogram of goat prices")
plt.xlabel("Prices")
def confidence_bounds(alpha, data):
return st.t.interval(alpha=alpha, df=len(data)-1, loc=np.mean(data), scale=st.sem(data))
confidence_bounds(0.95, data)
def critical_values(p, alpha, n):
data = np.array([0, 1, 1, 2, 1, 2, 1, 0, 2, 1])
m = sum(data)/len(data)
v2 = sum((data-m)**2)/len(data)
sqrt = math.sqrt((n*12)*p*(1-p))
Ca = math.ceil(n + sqrt*norm.ppf(1-alpha, 0, 1))
return Ca
critical_values(1/12, 0.05, 10)
from scipy.stats import binom
def power_analysis(n, p):
Ca = critical_values(1/12, 0.05, n)
powerlist = []
for i in p:
powerlist.append(1 - binom.cdf(Ca, n = n*12, p = i))
return powerlist
p_arr = np.linspace(0,1,100)
res1 = power_analysis(10, np.linspace(0,1,100))
plt.plot(p_arr, res1)
res2 = power_analysis(20, np.linspace(0,1,100))
plt.plot(p_arr, res2)
plt.xlabel("p")
x = [288, 351, 332, 268, 289, 319, 300, 298, 295, 287, 284, 297, 302, 294, 284, 299, 298, 350, 308, 284, 295, 307, 338, 311]
y = [1.968, 2.472, 2.286, 1.980, 2.004, 2.290, 2.054, 2.135, 2.125, 2.016, 2.016, 1.998,
2.113, 1.973, 2.004, 2.069, 2.154, 2.388, 2.125, 2.054, 1.963, 2.207, 2.342, 2.168]
plt.plot(x, y, 'o', color = 'black')
plt.title('Scatterplot visitors versus sales')
plt.xlabel('Visitors')
plt.ylabel('Sales (in thousands)')
xy = np.array(x) * np.array(y)
beta_estimator = (np.mean(xy) - np.mean(x) * np.mean(y))/(np.mean(np.square(x)) - np.mean(x) ** 2)
alpha_estimator = np.mean(y) - beta_estimator * np.mean(x)
plt.title('Sales versus Visitors')
plt.xlabel('Visitors')
plt.ylabel('Sales in thousands')
y_hat = alpha_estimator+beta_estimator*np.array(x)
plt.plot(x, y, 'o')
plt.plot(x, y_hat)
residuals = np.array(y) - np.array(y_hat)
plt.plot(y_hat, residuals, 'o', color = 'black')
sm.qqplot(residuals, line ='45', fit = True)
plt.title("Normal QQ-plot")
plt.show()
print((alpha_estimator+beta_estimator*(np.mean(x)*1.2) - np.mean(y)) * 1000)