library("dplyr")
# H0: there is no difference in means of number of victims sorted by offender race
# H1: there is a difference in means of number of victims sorted by offender race
### Determine Independence
# These observations were taken independently, we can assume they are independent.
### Test for Equality of Variance
library(car)
leveneTest(TOTAL_INDIVIDUAL_VICTIMS ~ OFFENDER_RACE, data = crime)
# we can see a P-value of 0.387. This is greater than the alpha value of 0.05,
# we can assume that the variances are equal.
anova.means <- tapply(crime$TOTAL_INDIVIDUAL_VICTIMS, INDEX = crime$OFFENSE_NAME, FUN = mean)
victim.meancenter <- crime$TOTAL_INDIVIDUAL_VICTIMS - anova.means[as.factor(crime$OFFENSE_NAME)]
#create a QQ Plot
qqnorm(victim.meancenter)
qqline(victim.meancenter)
# This data does not appear to be normally distributed, instead it seems strange
# The flat distribution with spikes at the end probably mean our data have more
# extreme values than a regular normal distribution
summary(aov(TOTAL_INDIVIDUAL_VICTIMS ~ OFFENDER_ETHNICITY, data = crime))
# There are likely variations in offender race mean victim counts
# our P-vaue is very close to zero.