# Question 4

### c)

set.seed(0)
num_cluster1 = 0
num_cluster2 = 0
n = 5000
ytotal <- c("list", n)
for(i in 1:n){
yi_total = round(runif(1))
if(yi_total == 0){
yi_total = 2 * (1 + 2)
num_cluster1 = num_cluster1 + 1
} else {
yi_total = 2 * (3 + 4 + 5 + 6)
num_cluster2 = num_cluster2 + 1
}
ytotal[i] = yi_total
}
cat("Number of times cluster 1 was picked:",num_cluster1,"\n")
cat("The value of Ytotal if cluster 1 is picked is 6\n")
cat("Number of times cluster 2 was picked:",num_cluster2,"\n")
cat("The value of Ytotal if cluster 2 is picked is 36\n")
cat("Variance of the column:",var(ytotal))
cat("Since the column vector is \nonly comprised of values 6 and 36, \nthis verifies 4a),\n")
cat("Since the variance is almost 225, this verfifies 4b)")

```
Number of times cluster 1 was picked: 2565
The value of Ytotal if cluster 1 is picked is 6
Number of times cluster 2 was picked: 2435
The value of Ytotal if cluster 2 is picked is 36
Variance of the column: 224.8929Since the column vector is
only comprised of values 6 and 36,
this verifies 4a),
Since the variance is almost 225, this verfifies 4b)
```

### d)

big_s = c("list",n)
for(i in 1:n){
s = sample(1:6, 3)
ytotal = 0
for(j in 1:3){
ytotal = ytotal + s[j]
}
big_s[i] = ytotal
}
cat("Since the column vector is \nonly comprised of values 6 to 15, \nthis verifies a),\n")
cat("Variance of the column:",var(big_s),"\n")
cat("Since the variance is almost 14/3, this verfifies b)")

```
Since the column vector is
only comprised of values 6 to 15,
this verifies a),
Variance of the column: 5.400855
Since the variance is almost 14/3, this verfifies b)
```

# Question 5

### a)

Est = function(point.est, se.est, digits=2){
round(c(point.est, point.est - 1.96*se.est,point.est + 1.96*se.est), digits)
}
ssc = read.csv("ssc.csv",header=TRUE, sep = ",")
ssc$gender [ssc$gender == "f"] <- 0
ssc$gender [ssc$gender == "m"] <- 1
ssc$gender = as.integer(ssc$gender)
standard_error = function(x) sd(x) / sqrt(length(x))
females_percent = table(ssc$gender)[1] / nrow(ssc)
cat("Estimated percentage of females in sample:",females_percent * 100,"%\n")
CI = Est(females_percent, standard_error(ssc$gender))
cat("CI: (",CI[2],",",CI[3],")\n")

```
Estimated percentage of females in sample: 30.66667 %
CI: ( 0.23 , 0.38 )
```

### b)

cat("Estimated total amount of females in SSC:",CI[1] * 864,"\n")
cat("CI: (",CI[2] * 864,",",CI[3] * 864,")")

```
Estimated total amount of females in SSC: 267.84
CI: ( 198.72 , 328.32 )
```

# Question 6

### a)

Var = function(x){
if(length(x)==1) return(0)
return(var(x))
}
radon = read.csv("radon.csv",header=TRUE, sep = ",")
nh = by(radon[,"radon"], radon[,"countyname"],length)
n = length(radon[,"radon"])
omegah = nh / n
Ybarh = by(radon[,"radon"], radon[,"countyname"], mean)
Ystr = sum(omegah * Ybarh)
cat("Estimate of average radon level:",Ystr,"\n")
CI = Est(Ystr, standard_error(radon[,"radon"]))
cat("CI: (",CI[2],",",CI[3],")\n")
radon$logradon = log(radon$radon)
nh = by(radon[,"logradon"], radon[,"countyname"],length)
n = length(radon[,"logradon"])
omegah = nh / n
Ybarh = by(radon[,"logradon"], radon[,"countyname"], mean)
Ystr = sum(omegah * Ybarh)
cat("Estimate of average logradon level:",Ystr,"\n")
CI = Est(Ystr, standard_error(radon[,"logradon"]))
cat("CI: (",CI[2],",",CI[3],")\n")

```
Estimate of average radon level: 4.75663
CI: ( 4.48 , 5.03 )
Estimate of average logradon level: 1.23307
CI: ( 1.18 , 1.28 )
```

### b)

radon_greater_4 = length(radon[,"radon"][(radon[,"radon"])>4])
cat("Estimate of number of homes with radon level greater than 4:",radon_greater_4,"\n")
CI = Est(radon_greater_4, standard_error(radon[,"radon"]))
cat("CI: (",CI[2],",",CI[3],")\n")

```
Estimate of number of homes with radon level greater than 4: 443
CI: ( 442.72 , 443.28 )
```