# Run this code to load the required packages
suppressMessages(suppressWarnings(suppressPackageStartupMessages({
library(mosaic)
library(supernova)
library(Lock5withR)
})))
# Adjust the plots to be a bit smaller
options(repr.plot.width = 6, repr.plot.height = 4)
CensusSchool <- read.csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vSVaWnM4odSxy0mlnhWvvGbeLtiKoZmsbqC6KLzXtBOjQfrF9EVKuX4RVh3XbP3iw/pub?gid=2100178416&single=true&output=csv", header = TRUE)
str(CensusSchool)
'data.frame': 10113 obs. of 60 variables:
$ Country : chr "USA" "USA" "USA" "USA" ...
$ Region : chr "SD" "PA" "CO" "MN" ...
$ DataYear : int 2011 2020 2015 2018 2014 2011 2011 2011 2021 2011 ...
$ ClassGrade : int 9 9 9 9 9 9 9 9 9 9 ...
$ Gender : chr "Female" "Female" "Male" "Female" ...
$ Ageyears : num 16 14 15 14 16 14 15 14 14 14 ...
$ Handed : chr "Right-Handed" "Left-Handed" "Right-Handed" "Right-Handed" ...
$ Height_cm : num 61 170 68 16256 385 ...
$ Footlength_cm : num 19 30 24 8 22 800 22 25 25 23 ...
$ Armspan_cm : num 157 67 164 NA 147 168 150 166 172 148 ...
$ Languages_spoken : num 0 1 1 1 1 1 1 1 1 2 ...
$ Travel_to_School : chr "Bus" "Bus" "Car" "Bus" ...
$ Travel_time_to_School : num 45 45 NA 20 12 20 15 10 6 20 ...
$ Reaction_time : num 0.422 0.31 0.34 0.491 0.433 0.297 0.375 61 0.721 0.469 ...
$ Score_in_memory_game : num 44 47 41 46 48 35 37 60 44 41 ...
$ Favourite_physical_activity : chr "Swimming" "Basketball" "Baseball/Softball" "Dancing" ...
$ Importance_reducing_pollution : num 5 1000 1000 1000 900 350 25 0 5 NA ...
$ Importance_recycling_rubbish : num 400 900 1000 1000 0 350 15 0 21 1050 ...
$ Importance_conserving_water : num 600 900 1000 0 800 250 50 0 102 1000 ...
$ Importance_saving_energy : num NA 900 500 1000 1000 NA NA NA 1 NA ...
$ Importance_owning_computer : num 0 0 0 0 1000 0 0 0 5 NA ...
$ Importance_Internet_access : num 0 0 0 0 0 0 0 0 0 0 ...
$ Left_Footlength_cm : num 18 30 24 9 21 800 21 26 24 23 ...
$ Longer_foot : chr "Right foot" "Same length" "Same length" "Same length" ...
$ Index_Fingerlength_mm : num 10 8 90 3 60 3000 6 8 7 8 ...
$ Ring_Fingerlength_mm : num 6 7 95 5 60 3500 6 10 6.5 7 ...
$ Longer_Finger_Lefthand : chr "Index finger" "Index finger" "Index finger" "Ring finger" ...
$ Birth_month : chr "October" "June" "August" "April" ...
$ Favorite_Season : chr "Spring" "Summer" "Summer" "Fall" ...
$ Allergies : chr "Yes" "Yes" "No" "Yes" ...
$ Vegetarian : chr "No" "No" "No" "No" ...
$ Favorite_Food : chr "Pizza/Pasta" "Desserts" "Meat" "No favorite" ...
$ Beverage : chr "Water" "Water" "Water" "Water" ...
$ Favorite_School_Subject : chr "History" "Other" "Mathematics and statistics" "History" ...
$ Sleep_Hours_Schoolnight : num 8 8 8 8 9 7 8 9 8 5 ...
$ Sleep_Hours_Non_Schoolnight : num 8 9 8 7 8 9 11 7 NA 13 ...
$ Home_Occupants : num 5 4 5 6 4 5 2 4 8 4 ...
$ Home_Internet_Access : chr "No internet connection" "Yes - broadband connection" "Yes - other" "Yes - other" ...
$ Communication_With_Friends : chr "In person" "Text messaging" "Text messaging" "Internet chat or instant messaging" ...
$ Text_Messages_Sent_Yesterday : num 0 10 3 8 200 500 12 250 1 249 ...
$ Text_Messages_Received_Yesterday: num 0 10 4 3 500 500 10 300 0 305 ...
$ Hanging_Out_With_Friends_Hours : num 2 NA 3 5 0 10 14 20 12 3 ...
$ Talking_On_Phone_Hours : num 0 1 2 7 10 0 0 15 3 1 ...
$ Doing_Homework_Hours : num 3 10 3 3 100 2 28 10 6 1 ...
$ Doing_Things_With_Family_Hours : num 1 4 15 7 10 9 28 15 12 0 ...
$ Outdoor_Activities_Hours : num 4 14 15 5 0 5 0 50 7 0 ...
$ Video_Games_Hours : num 0 0 3 0 6 0 25 0 0 2 ...
$ Social_Websites_Hours : num 1 21 3 7 10 0 7 7 0 3 ...
$ Texting_Messaging_Hours : num 1 5 2 7 56 10 24 10 0 2 ...
$ Computer_Use_Hours : num 1 25 2 5 6 1 24 5 0 12 ...
$ Watching_TV_Hours : num 2 18 5 3 1 1 28 0 0 2 ...
$ Paid_Work_Hours : num 5 2 0 0 0 0 0 0 0 3 ...
$ Work_At_Home_Hours : num 2 2 10 3 10 6 24 10 7 4 ...
$ Schoolwork_Pressure : chr "Some" "A lot" "Some" "Very little" ...
$ Planned_Education_Level : chr "Some college" "Undergraduate degree" "Some college" "Some college" ...
$ Favorite_Music : chr "Punk rock" "Pop" "Rap/Hip hop" "Rap/Hip hop" ...
$ Superpower : chr "Invisibility" "Fly" "Invisibility" "Telepathy" ...
$ Preferred_Status : chr "Healthy" "Happy" "Famous" "Healthy" ...
$ Role_Model_Type : chr "Friend" "Relative" "Sports person" "Actor" ...
$ Charity_Donation : chr "Health" "Health" "Health" "International aid" ...
gf_point(Paid_Work_Hours ~ Gender, data = CensusSchool)
Warning message:
“Removed 59 rows containing missing values (geom_point).”
CensusSchool4 <- filter(CensusSchool, Paid_Work_Hours != 7000, Gender != "NA")
gf_jitter(Paid_Work_Hours ~ Gender, data = CensusSchool4)
CensusSchool9 <- filter(CensusSchool, Paid_Work_Hours <= 168)
gf_point(Paid_Work_Hours ~ Gender, data = CensusSchool9)
CensusSchool9 <- filter(CensusSchool, Paid_Work_Hours <= 168, Gender != "NA")
gf_jitter(Paid_Work_Hours ~ Gender, data = CensusSchool9)%>%
gf_boxplot(alpha = .1, color = "red", size = .5)
CensusSchool10 <- filter(CensusSchool, Paid_Work_Hours < 40, Gender != "NA")
gf_jitter(Paid_Work_Hours ~ Gender, data = CensusSchool10)%>%
gf_boxplot(alpha = .4, color = "dodgerblue2")
censusschool.model <- lm(Paid_Work_Hours ~ Gender, data = CensusSchool10)
censusschool.model
Genderpredictions <- predict(censusschool.model)
Genderpredictions
gf_jitter(Paid_Work_Hours ~ Gender, data = CensusSchool10) %>%
gf_jitter(Genderpredictions ~ Gender, color = "red", size = 0.3)
supernova(censusschool.model)