Discrete Random Variable
coin3 <- data.frame(
N_Heads = c(0, 1, 2, 3),
C_Heads = c(1, 3, 3, 1)
)
attach(coin3)
coin3$P_Heads <- C_Heads/sum(C_Heads)
options(repr.plot.width=7, repr.plot.height=4)
barplot(coin3$P_Heads, names = coin3$N_Heads, main = "Discrete Probability Distribution of flipping three coins")
# Optional for advanced learners
barplot(prop.table(table(c('HHH', 'HHT', 'HHT','HTT','HHT','HTT','HTT', 'TTT'))),
main = "Discrete Probability Distribution of tossing three coins",
xlab = "x = Number of Heads", ylab = "P(x)", col = "#6633FF")
Expected Value
attach(coin3)
E_X = sum(N_Heads * P_Heads)
E_X
Variance
fees = data.frame(
N_Bags = c(0, 1, 2),
X = c(0, 25, 60),
P_X = c(0.54, 0.34, 0.12)
)
fees
attach(fees)
E_X <- sum(X * P_X)
print(paste("The expected revenue per passenger is $", E_X))
var <- sum((X - E_X)^2 * P_X)
print(paste("The variance of the revenue per passenger is ", var))
print(paste("The standard deviation of the revenue per passenger is $", round(sqrt(var),2)))
Binomial Distribution
# Create a sample of 50 numbers which are incremented by 1.
x <- seq(0,50)
# Create the binomial distribution.
y <- dbinom(x,50,0.5)
plot(x, y, 'h')
Continuous Random Variable
Uniform_Random_Number <- runif(10000, -2, 1)
hist(Uniform_Random_Number, probability = TRUE)
# Optional for Advanced learners
a <- -2
b <- 1
rand.unif <- runif(10000, min = -2, max = 1)
hist(rand.unif, freq = FALSE, xlab = 'x', ylim = c(0, 0.35), xlim = c(-2.2,1.2),
density = 5, main = "Uniform distribution for the interval [-2,1]")
curve(dunif(x, min = a, max = b), from = -3, to = 2, n = 100000, col = "darkblue",
lwd = 5, add = TRUE, yaxt = "n", ylab = 'probability')
Normal Distribution
# Demonstration of Normal Distribution plotting
options(repr.plot.width=7, repr.plot.height=4)
x=seq(-4,4,length=200)
y=1/sqrt(2*pi)*exp(-x^2/2)
plot(x,y,type="l",lwd=2,col="red", main = "Normal Distribution")
# Optional for advanced learners
options(repr.plot.width=7, repr.plot.height=4)
n=10; p=0.5; x=0:10; mu=n*p; s=sqrt(n*p*(1-p))
y=dbinom(x,10,p)
plot(x,y,type="h",lwd=2,col="red")
xx=seq(0,10,length=200)
yy=dnorm(xx,mu,s)
lines(xx,yy,lwd=2,col="blue")
Interactive Normal Distribution
Normal Approximation
normal_dist <- function(x, b){
mu = mean(x)
sigma = sd(x)
x1 <- seq(mu - 6*sigma, mu + 6*sigma, length = 100)
# Normal curve
fun <- dnorm(x1, mean = mu, sd = sigma)
# Histogram
options(repr.plot.width=7, repr.plot.height=4)
hist(x, prob = TRUE, col = "white", breaks = b,
xlim = c(mu - 6*sigma, mu + 6*sigma),
ylim = c(0, max(fun)),
main = "Histogram overlayed with normal curve")
lines(x1, fun, col = 2, lwd = 2)
}
data(chickwts)
attach(chickwts)
normal_dist(chickwts$weight, 15)
Test of Normality
shapiro.test(chickwts$weight)
# Is the approximation normal?
cat("The data can be approximated as normal distribution since p >", round(shapiro.test(chickwts$weight)$p.value, 2))
Z Score
chickwts$zscore = (weight - mean(weight))/sd(weight)
tail(chickwts)
problems from the book
#b.
z_sophiaV = (160 - 151)/7
z_sophiaQ = (157 - 153)/7.67
cat("Z Score for Verbal= ", round(z_sophiaV,2), "\nZ Score for Quantitative Reasoning= ", round(z_sophiaQ, 2))
# e.
cat("The verbal ability percentile score is ",round(pnorm(1.29, lower.tail = TRUE),4)*100, "%\n")
cat("The quantitative reasoning percentile score is ",round(pnorm(0.52, lower.tail = TRUE),4)*100, "%\n")
# f.
cat(100 - round(pnorm(1.29, lower.tail = TRUE),4)*100, "%", "did better than Sophia in verbal ability\n")
cat(100 - round(pnorm(0.52, lower.tail = TRUE),4)*100, "%", "did better than Sophia in quantitative reasoning")