library(dplyr) # for manipulating data (group_by, filter, select, summarise, etc.)
library(ggplot2) # for visualizing data using the ggplot command
# note that dplyr and ggplot2 are part of a larger ecosystem of packages called `tidyverse`
# in the future we can simply run `library(tidyverse)` to load all dplyr and ggplot2
library(jtools) # summ() commands to display regression output
library(stargazer) # stargazer() command for displaying tables and regression output
library(sjPlot) # plot_model() command for visualizing output
# convert scientific notation to numerals throughout the notebook
options(scipen=999)
# change plot size to 6in x 5in to fit the Deepnote window
options(repr.plot.width=6, repr.plot.height=5)
Attaching package: ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
Please cite as:
Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
Registered S3 methods overwritten by 'broom':
method from
tidy.glht jtools
tidy.summary.glht jtools
Learn more about sjPlot with 'browseVignettes("sjPlot")'.
df_store <- read.csv("Data_Class7_Store24_B.csv")
names(df_store)
str(df_store)
'data.frame': 75 obs. of 14 variables:
$ store : int 1 2 3 4 5 6 7 8 9 10 ...
$ sales : int 1060294 1619874 1099921 1053860 1227841 1703140 1809256 1378482 2113089 1080979 ...
$ profit : int 265014 424007 222735 210122 300480 469050 476355 361115 474725 278625 ...
$ mtenure : num 0 86.22 23.89 0 3.88 ...
$ ctenure : num 24.8 6.64 5.03 5.37 6.87 ...
$ pop : int 7535 8630 9695 2797 20335 16926 17754 20824 26519 16381 ...
$ comp : num 2.8 4.24 4.49 4.25 1.65 ...
$ visibility: int 3 4 3 4 2 3 2 4 2 4 ...
$ pedcount : int 3 3 3 2 5 4 5 3 4 3 ...
$ res : int 1 1 1 1 0 1 1 1 1 1 ...
$ hours24 : int 1 1 1 1 1 0 1 1 1 0 ...
$ crewskill : num 3.56 3.2 3.8 2.06 3.65 ...
$ mgrskill : num 3.15 3.56 4.12 4.1 3.59 ...
$ servqual : num 86.8 94.7 78.9 100 68.4 ...
head(df_store)
stargazer(df_store, type = "text", digits = 2)
===========================================================================
Statistic N Mean St. Dev. Min Pctl(25) Pctl(75) Max
---------------------------------------------------------------------------
store 75 38.00 21.79 1 19.5 56.5 75
sales 75 1,205,413.00 304,531.30 699,306 984,579 1,362,388 2,113,089
profit 75 276,313.60 89,404.08 122,180 211,003.5 331,313.5 518,998
mtenure 75 45.30 57.67 0 6.7 50.9 278
ctenure 75 13.93 17.70 0.89 4.39 17.22 114.15
pop 75 9,825.59 5,911.67 1,046 5,616.5 14,104 26,519
comp 75 3.79 1.31 1.65 3.15 4.23 11.13
visibility 75 3.08 0.75 2 3 4 5
pedcount 75 2.96 0.99 1 2 4 5
res 75 0.96 0.20 0 1 1 1
hours24 75 0.84 0.37 0 1 1 1
crewskill 75 3.46 0.41 2.06 3.22 3.66 4.64
mgrskill 75 3.64 0.41 2.96 3.34 3.92 4.62
servqual 75 87.15 12.61 57.90 78.95 99.90 100.00
---------------------------------------------------------------------------
# Insert your own analyses here. You can add more code blocks as needed.
names(df_store)
model_1 <- lm(profit ~ mtenure + ctenure, data = df_store)
summ(model_1)
#What is the relationship between service quality and profit?
model_2 <- lm(profit ~ servqual, data = df_store)
summ(model_2)
#What is the relationship between tenure and service quality?
model_3 <- lm(servqual ~ mtenure + ctenure, data = df_store)
summ(model_3)
#What is the relationship between service quality and other skill metrics?
model_4 <- lm(servqual ~ crewskill + mgrskill, data = df_store)
summ(model_4)
#What is the relationship between service quality and other intrinsic store metrics?
model_5 <- lm(servqual ~ pop + comp + visibility + pedcount + res + hours24, data = df_store)
summ(model_5)
#Last, just for fun, I'm going to run the "kitchen sink" regression.
#What is the relationship between service quality, other variables used previously, and profit?
#I'm hoping that adding servqual to the data we had in part A of the case will improve the predictive power of the regression
model_6 <- lm(profit ~ mtenure + ctenure + pop + comp + visibility + pedcount + res + hours24 + crewskill + mgrskill + servqual, data = df_store)
summ(model_6)