# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/work/Europe Hotel Booking Satisfaction Score.csv'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
Kelompok 3
Kelvien Kurniawan & Niki Baskoro Indrakesuma
Data Background
This data consist of customer satisfaction variables on European hotel made by Ishan Singh.
Dataframe
Outline
Preprocessing
df_orig = pd.read_csv('/work/Europe Hotel Booking Satisfaction Score.csv')
df = df_orig.copy()
df = df.iloc[:,6:16]
df = df.dropna() #check missing data
df
Importing Factor Analyzer
pip install factor_analyzer # installing Factor Analyzer
Collecting factor_analyzer
Downloading factor_analyzer-0.3.2.tar.gz (40 kB)
|████████████████████████████████| 40 kB 9.3 MB/s
Requirement already satisfied: pandas in /shared-libs/python3.7/py/lib/python3.7/site-packages (from factor_analyzer) (1.2.4)
Requirement already satisfied: scipy in /shared-libs/python3.7/py/lib/python3.7/site-packages (from factor_analyzer) (1.6.3)
Requirement already satisfied: numpy in /shared-libs/python3.7/py/lib/python3.7/site-packages (from factor_analyzer) (1.19.5)
Requirement already satisfied: scikit-learn in /shared-libs/python3.7/py/lib/python3.7/site-packages (from factor_analyzer) (0.24.2)
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pandas->factor_analyzer) (2021.1)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pandas->factor_analyzer) (2.8.1)
Requirement already satisfied: six>=1.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas->factor_analyzer) (1.16.0)
Requirement already satisfied: joblib>=0.11 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from scikit-learn->factor_analyzer) (1.0.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from scikit-learn->factor_analyzer) (2.1.0)
Building wheels for collected packages: factor-analyzer
Building wheel for factor-analyzer (setup.py) ... done
Created wheel for factor-analyzer: filename=factor_analyzer-0.3.2-py3-none-any.whl size=40395 sha256=d0f6a0ce08a25644a138c2686fe2ee8d1089b3357f9e8d58e0dea4a68431fe15
Stored in directory: /root/.cache/pip/wheels/8d/9e/4c/fd4cb92cecf157b13702cc0907e5c56ddc48e5388134dc9f1a
Successfully built factor-analyzer
Installing collected packages: factor-analyzer
Successfully installed factor-analyzer-0.3.2
WARNING: Running pip as root will break packages and permissions. You should install packages reliably by using venv: https://pip.pypa.io/warnings/venv
Note: you may need to restart the kernel to use updated packages.
from factor_analyzer import FactorAnalyzer # Then, we import the installed package into our notebook.
Testing a few Assumptions
# Barlett's
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity
calculate_bartlett_sphericity(df)
# KMO
from factor_analyzer.factor_analyzer import calculate_kmo
kmo_all, kmo_model = calculate_kmo(df)
print(kmo_model)
0.7174511027484614
Identify the Number of Factors
# instantiate the Factor Analyzer
fa = FactorAnalyzer()
# Fit the dataframe using Factor Analyzer
fa.fit(df)
# Identify the eigenvalues
ev, v = fa.get_eigenvalues() #eigenvalues
# display the eigenvalues
ev
# Then, we repeat the factor analyzer using five factors, fitting it, and print the factor loadings for each variables.
fa = FactorAnalyzer(5, rotation='varimax')
fa.fit(df)
print(fa.loadings_)
[[ 1.26462633e-01 8.38894918e-01 2.50537057e-01 7.95662921e-02
1.05534846e-02]
[-1.47036597e-02 2.30507151e-01 5.92309151e-01 3.25777876e-02
1.23698785e-01]
[ 8.75130674e-04 7.16694702e-01 4.59687353e-01 -8.28888943e-03
-9.37391602e-04]
[ 5.82353877e-04 1.95734915e-01 6.92182874e-01 -7.09490226e-03
-8.19639711e-02]
[ 7.61114423e-01 4.88521505e-02 -6.45854131e-03 -9.41492251e-03
-1.70730930e-04]
[ 7.61587221e-01 3.06077072e-02 7.14286268e-03 -4.85711295e-03
1.63959131e-01]
[ 8.14204090e-01 8.93243514e-02 -2.87288384e-02 4.90233444e-01
-7.89494712e-02]
[ 1.17331748e-01 5.21384399e-03 2.14729777e-02 1.47468863e-01
6.08247583e-01]
[ 5.45823587e-02 4.23506339e-02 2.35036460e-02 7.60265265e-01
1.95255862e-01]
[ 8.58421466e-01 2.20351820e-02 4.95910468e-03 1.46380872e-02
1.31337152e-01]]
lmatrix = pd.DataFrame(fa.loadings_, index = list(df.columns), columns = ['Factor 1', 'Factor 2', 'Factor 3', 'Factor 4', 'Factor 5'])
lmatrix #loading matrix
lmatrix.sort_values('Factor 1', ascending=False)
lmatrix.sort_values('Factor 2', ascending=False)
lmatrix.sort_values('Factor 3', ascending=False)
lmatrix.sort_values('Factor 4', ascending=False)
lmatrix.sort_values('Factor 5', ascending=False)