Dai_Ikegami_Class3

library(dplyr) # for manipulating data (group_by, filter, select, summarise, etc.) library(ggplot2) # for visualizing data using the ggplot command # note that dplyr and ggplot2 are part of a larger ecosystem of packages called `tidyverse` # in the future we can simply run `library(tidyverse)` to load all dplyr and ggplot2 library(jtools) # summ() commands to display regression output library(stargazer) # stargazer() command for displaying tables and regression output library(GGally) # ggpairs() command # convert scientific notation to numerals throughout the notebook options(scipen=999) # change plot size to 6in x 5in to fit the Deepnote window options(repr.plot.width=6, repr.plot.height=5)

df_store <- read.csv("Data_Store24A_Class3.csv")

str(df_store)

summary(df_store)

round(cor(df_store),2)

model1 <- lm(profit ~ ctenure, data = df_store) summ(model1) summ(model1, confint=TRUE)

model1 <- lm(profit ~ mtenure, data = df_store) summ(model1) model2 <- lm(profit ~ ctenure, data = df_store) summ(model2) model3 <- lm(profit ~ mtenure + ctenure, data = df_store) summ(model3)

model_1 <- lm(profit ~ mtenure + ctenure + pop + comp + visibility + pedcount + res + hours24, data = df_store) summ(model_1)

fit_profit3 <- lm(profit ~ mtenure + ctenure, data = df_store)

fit_profit4 <- lm(profit ~ pop + comp + visibility + pedcount + res + hours24, data = df_store)

fit_profit5 <- lm(profit ~ mtenure + ctenure + pop + comp + visibility + pedcount + res + hours24, data = df_store)

stargazer(fit_profit3, fit_profit4, fit_profit5, type = "text", digits = 2, report = ('vcstp*'))

anova(fit_profit4,fit_profit5)

ggplot(df_store, aes(x = mtenure, y = profit / 1000)) + geom_point(col = "blue") + xlab("Manager Tenure (in months)") + ylab("Store Profit (in thousands)")

# add a regression line to the plot using `geom_smooth` with the `lm` method # this helps to visualize how well a straight line fits the observations ggplot(df_store, aes(x = mtenure, y = profit / 1000)) + geom_point(col = "blue") + xlab("Manager Tenure (in months)") + ylab("Store Profit (in thousands)") + geom_smooth(method = "lm", formula = y~x, se = FALSE, col = "purple3")

fit_mt_low <- lm(profit ~ mtenure + ctenure + pop + comp + visibility + pedcount + res + hours24, data = subset(df_store, mtenure < 45)) summ(fit_mt_low) fit_mt_high <- lm(profit ~ mtenure + ctenure + pop + comp + visibility + pedcount + res + hours24, data = subset(df_store, mtenure >= 45)) summ(fit_mt_high)

# you can manually create a squared variable df_store <- df_store %>% # this line updates the dataframe with the new variable mutate(MT2 = mtenure^2) # the mutate function appends the new variable "MT2" to the dataframe fit_nonlinear <- lm(profit ~ mtenure + MT2 + ctenure + pop + comp + visibility + pedcount + res + hours24, data = df_store) summ(fit_nonlinear) #or you can use an R short-cut within the lm command to do the same thing: fit_profit6 <- lm(profit ~ mtenure + I(mtenure^2) + ctenure + pop + comp + visibility + pedcount + res + hours24, data = df_store) summ(fit_profit6)

ggplot(df_store, aes(x = mtenure, y = profit / 1000)) + geom_point(col = "blue") + geom_smooth(method = "lm", formula = y~x, se = FALSE, col = "purple3") + geom_smooth(method = "lm", formula = y~x + I(x^2), se = FALSE, col = "green3") + scale_x_continuous("Manager Tenure in Months, by Store",breaks=c(0,50,100,150,200)) + xlab("Manager Tenure (in months)") + ylab("Store Profit (in thousands)") + theme_classic()

df_storeB <- read.csv("Data_Store24B_Class3.csv") names(df_storeB)

summary(df_storeB)

# this uses the GGally package, already loaded above ggpairs(df_storeB, columns= c("profit", "mtenure", "mgrskill", "servqual"))

model_7 <- lm(profit ~ mtenure + I(mtenure^2) + ctenure + pop + comp + visibility + pedcount + res + hours24 + mgrskill, data = df_storeB) summ(model_7)

model_8 <- lm(servqual ~ mtenure + I(mtenure^2) + ctenure + pop + comp + visibility + pedcount + res + hours24 + mgrskill, data = df_storeB) summ(model_8)

model_9 <- lm(profit ~ mtenure + I(mtenure^2) + ctenure + pop + comp + visibility + pedcount + res + hours24 + servqual + mgrskill, data = df_storeB) summ(model_9)

model_1 <- lm(profit ~ mtenure + ctenure + pop + comp + visibility + pedcount + res + hours24, data = df_store) summary(model_1)

stargazer(df_store[c("mtenure", "ctenure")], type = "text", digits = 2)

#df_store$mtenure.scaled <- (df_store$mtenure - mean(df_store$mtenure)) / sd(df_store$mtenure) df_store <- df_store %>% mutate(mtenure.scaled = (mtenure - mean(mtenure)) / sd(mtenure))

df_store$mtenure.scaled1 <- scale(df_store$mtenure) df_store$ctenure.scaled <- scale(df_store$ctenure)

stargazer(df_store[c("mtenure", "mtenure.scaled", "mtenure.scaled1")], type = "text", digits = 2)

model_1_scaled <- lm(profit ~ mtenure.scaled + ctenure.scaled + pop + comp + visibility + pedcount + res + hours24, data = df_store) summary(model_1_scaled)

stargazer(model_1, model_1_scaled, type = "text", digits = 2, report = ('vcstp*'))

# BONUS code # You can also scale individual variables directly within the linear model command model_6 <- lm(profit ~ scale(mtenure) + scale(ctenure) + pop + comp + visibility + pedcount + res + hours24, data = df_store) summ(model_6)

library(sjPlot) plot_model(model_6)

# you can compare the graph above to a regression table # display confidence intervals with `ci=TRUE` # the confidence intervals below should correspond to those in the graph above stargazer(model_6, type = "text", digits = 2, report = ('vcsp*'), ci=TRUE)