# Run this code to load the required packages
suppressMessages(suppressWarnings(suppressPackageStartupMessages({
library(mosaic)
library(supernova)
library(Lock5withR)
})))
# Adjust the plots to be a bit smaller
options(repr.plot.width = 6, repr.plot.height = 4)
CensusSchool <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSVaWnM4odSxy0mlnhWvvGbeLtiKoZmsbqC6KLzXtBOjQfrF9EVKuX4RVh3XbP3iw/pub?gid=2100178416&single=true&output=csv", header = TRUE)
str(CensusSchool)
'data.frame': 10113 obs. of 60 variables:
$ Country : chr "USA" "USA" "USA" "USA" ...
$ Region : chr "SD" "PA" "CO" "MN" ...
$ DataYear : int 2011 2020 2015 2018 2014 2011 2011 2011 2021 2011 ...
$ ClassGrade : int 9 9 9 9 9 9 9 9 9 9 ...
$ Gender : chr "Female" "Female" "Male" "Female" ...
$ Ageyears : num 16 14 15 14 16 14 15 14 14 14 ...
$ Handed : chr "Right-Handed" "Left-Handed" "Right-Handed" "Right-Handed" ...
$ Height_cm : num 61 170 68 16256 385 ...
$ Footlength_cm : num 19 30 24 8 22 800 22 25 25 23 ...
$ Armspan_cm : num 157 67 164 NA 147 168 150 166 172 148 ...
$ Languages_spoken : num 0 1 1 1 1 1 1 1 1 2 ...
$ Travel_to_School : chr "Bus" "Bus" "Car" "Bus" ...
$ Travel_time_to_School : num 45 45 NA 20 12 20 15 10 6 20 ...
$ Reaction_time : num 0.422 0.31 0.34 0.491 0.433 0.297 0.375 61 0.721 0.469 ...
$ Score_in_memory_game : num 44 47 41 46 48 35 37 60 44 41 ...
$ Favourite_physical_activity : chr "Swimming" "Basketball" "Baseball/Softball" "Dancing" ...
$ Importance_reducing_pollution : num 5 1000 1000 1000 900 350 25 0 5 NA ...
$ Importance_recycling_rubbish : num 400 900 1000 1000 0 350 15 0 21 1050 ...
$ Importance_conserving_water : num 600 900 1000 0 800 250 50 0 102 1000 ...
$ Importance_saving_energy : num NA 900 500 1000 1000 NA NA NA 1 NA ...
$ Importance_owning_computer : num 0 0 0 0 1000 0 0 0 5 NA ...
$ Importance_Internet_access : num 0 0 0 0 0 0 0 0 0 0 ...
$ Left_Footlength_cm : num 18 30 24 9 21 800 21 26 24 23 ...
$ Longer_foot : chr "Right foot" "Same length" "Same length" "Same length" ...
$ Index_Fingerlength_mm : num 10 8 90 3 60 3000 6 8 7 8 ...
$ Ring_Fingerlength_mm : num 6 7 95 5 60 3500 6 10 6.5 7 ...
$ Longer_Finger_Lefthand : chr "Index finger" "Index finger" "Index finger" "Ring finger" ...
$ Birth_month : chr "October" "June" "August" "April" ...
$ Favorite_Season : chr "Spring" "Summer" "Summer" "Fall" ...
$ Allergies : chr "Yes" "Yes" "No" "Yes" ...
$ Vegetarian : chr "No" "No" "No" "No" ...
$ Favorite_Food : chr "Pizza/Pasta" "Desserts" "Meat" "No favorite" ...
$ Beverage : chr "Water" "Water" "Water" "Water" ...
$ Favorite_School_Subject : chr "History" "Other" "Mathematics and statistics" "History" ...
$ Sleep_Hours_Schoolnight : num 8 8 8 8 9 7 8 9 8 5 ...
$ Sleep_Hours_Non_Schoolnight : num 8 9 8 7 8 9 11 7 NA 13 ...
$ Home_Occupants : num 5 4 5 6 4 5 2 4 8 4 ...
$ Home_Internet_Access : chr "No internet connection" "Yes - broadband connection" "Yes - other" "Yes - other" ...
$ Communication_With_Friends : chr "In person" "Text messaging" "Text messaging" "Internet chat or instant messaging" ...
$ Text_Messages_Sent_Yesterday : num 0 10 3 8 200 500 12 250 1 249 ...
$ Text_Messages_Received_Yesterday: num 0 10 4 3 500 500 10 300 0 305 ...
$ Hanging_Out_With_Friends_Hours : num 2 NA 3 5 0 10 14 20 12 3 ...
$ Talking_On_Phone_Hours : num 0 1 2 7 10 0 0 15 3 1 ...
$ Doing_Homework_Hours : num 3 10 3 3 100 2 28 10 6 1 ...
$ Doing_Things_With_Family_Hours : num 1 4 15 7 10 9 28 15 12 0 ...
$ Outdoor_Activities_Hours : num 4 14 15 5 0 5 0 50 7 0 ...
$ Video_Games_Hours : num 0 0 3 0 6 0 25 0 0 2 ...
$ Social_Websites_Hours : num 1 21 3 7 10 0 7 7 0 3 ...
$ Texting_Messaging_Hours : num 1 5 2 7 56 10 24 10 0 2 ...
$ Computer_Use_Hours : num 1 25 2 5 6 1 24 5 0 12 ...
$ Watching_TV_Hours : num 2 18 5 3 1 1 28 0 0 2 ...
$ Paid_Work_Hours : num 5 2 0 0 0 0 0 0 0 3 ...
$ Work_At_Home_Hours : num 2 2 10 3 10 6 24 10 7 4 ...
$ Schoolwork_Pressure : chr "Some" "A lot" "Some" "Very little" ...
$ Planned_Education_Level : chr "Some college" "Undergraduate degree" "Some college" "Some college" ...
$ Favorite_Music : chr "Punk rock" "Pop" "Rap/Hip hop" "Rap/Hip hop" ...
$ Superpower : chr "Invisibility" "Fly" "Invisibility" "Telepathy" ...
$ Preferred_Status : chr "Healthy" "Happy" "Famous" "Healthy" ...
$ Role_Model_Type : chr "Friend" "Relative" "Sports person" "Actor" ...
$ Charity_Donation : chr "Health" "Health" "Health" "International aid" ...
In the data provided by the United States Census at school questionnaire, the dataset comes
from a sample of 10,113 high school students at high schools in the United States. The students
completed this questionnaire as part of their statistics course. And now a researcher named Anthony
Gonzalez came across this data since he is a person that grew up spending alot of time with his
family he wonders if people from the questionnaire also spent more time with their family if they
prefer to be happy. So he claims that if a student chose that his preferred status was Happy then
that means they were most likely to spend more time with their family. How much time a student spends
doing things with their family model it can determine whether their preferred status should be happy
rich, famous, or healthy. In this research I am using the explanatory variable of Preferred_Status
to prove someones outcome variable which is Doing_Things_With_Family_Hours. This research questions
is important because we are trying to figure out if someone that spends more time with their family
is more likely to be happy according to my hypothesis.
Doing_Things_With_Family_Hours = Preferred Status + Other Stuff
Execution error
ERROR: Error in parse(text = x, srcfile = src): <text>:1:4: unexpected symbol
1: In the
^
CensusSchool <- filter(CensusSchool, Doing_Things_With_Family_Hours <= 16*7)
CensusSchool <- filter(CensusSchool, Preferred_Status!= "NA")
gf_histogram(~Doing_Things_With_Family_Hours, data = CensusSchool)%>%
gf_facet_grid(Preferred_Status ~ .)
Hours.Model <- lm(Doing_Things_With_Family_Hours ~ Preferred_Status, data = CensusSchool)
Hours.Model
CensusSchool <- filter(CensusSchool, Doing_Things_With_Family_Hours <= 16*7)
CensusSchool <- filter(CensusSchool, Preferred_Status!= "NA")
lm(Doing_Things_With_Family_Hours ~ NULL, data = CensusSchool) -> Empty_model
predict(Empty_model) -> CensusSchool$Empty.Pred
head(CensusSchool)
gf_jitter(Doing_Things_With_Family_Hours ~ Preferred_Status, data = CensusSchool) %>%
gf_jitter(predict(Hours.Model ~ Preferred_Status, color = "dodgerblue", height = 0))
Execution error
ERROR: Error in UseMethod("predict"): no applicable method for 'predict' applied to an object of class "formula"
supernova(Hours.Model)