Data Cleanup
import pandas as pd
# Load datasets
marriott = pd.read_csv('Marriott.csv')
hyatt = pd.read_csv('Hyatt.csv')
hyatt_classifications = pd.read_csv('Hyatt Classifications.csv') # Hyatt Brand Classification Mapping
marriott_classifications = pd.read_csv('Marriott Classifications.csv') # Marriott Property Classification Mapping
# Standardize column names (remove extra spaces)
marriott.columns = marriott.columns.str.strip()
hyatt.columns = hyatt.columns.str.strip()
hyatt_classifications.columns = hyatt_classifications.columns.str.strip()
marriott_classifications.columns = marriott_classifications.columns.str.strip()
# Rename columns for consistency
marriott.rename(columns={"Post-Tax Cash/Night": "Average Cash/Night"}, inplace=True)
marriott.rename(columns={"Average of Points/Night": "Average Points/Night"}, inplace=True)
hyatt.rename(columns={"Average Cents-per-Point": "CPP"}, inplace=True)
# Standardize Region names for Hyatt
country_region_mapping = {
"Brazil": "South America", "Chile": "South America", "Colombia": "South America",
"Argentina": "South America", "Peru": "South America", "Uruguay": "South America",
"Panama": "Central America", "Honduras": "Central America", "Nicaragua": "Central America",
"El Salvador": "Central America", "Guatemala": "Central America",
}
hyatt["Region"] = hyatt["Country"].map(country_region_mapping).where(hyatt["Country"].isin(country_region_mapping), hyatt["Region"])
# Standardize region naming conventions
region_mapping = {
"Australia & Pacific": "Australia & Oceania",
"Greater China": "Asia",
"United States & Canada": "North America",
"Latin America": "South America",
"Caribbean": "Central America"
}
hyatt["Region"] = hyatt["Region"].replace(region_mapping)
# Assign hotel chain labels
marriott["Hotel Chain"] = "Marriott"
hyatt["Hotel Chain"] = "Hyatt"
# Deduplicate classification mapping tables to avoid row expansion during merging
hyatt_classifications = hyatt_classifications.drop_duplicates(subset=["Brand"])
marriott_classifications = marriott_classifications.drop_duplicates(subset=["Property"])
# Merge Marriott and Hyatt datasets
hotels_df = pd.concat([marriott, hyatt], ignore_index=True, sort=False)
# Merge Hyatt classifications into hotels_df based on "Brand"
hotels_df = hotels_df.merge(hyatt_classifications[['Brand', 'Classification']],
on='Brand', how='left', suffixes=('', '_hyatt'))
# Merge Marriott classifications into hotels_df based on "Property"
hotels_df = hotels_df.merge(marriott_classifications[['Property', 'Classification']],
on='Property', how='left', suffixes=('', '_marriott'))
# Use Marriott classification if available, otherwise keep existing classification
hotels_df['Classification'] = hotels_df['Classification_marriott'].combine_first(hotels_df['Classification'])
# Drop temporary classification columns used for merging
hotels_df.drop(columns=['Classification_marriott', 'Classification_hyatt'], inplace=True, errors='ignore')
# Count the classifications for Marriott
marriott_counts = hotels_df[hotels_df["Hotel Chain"] == "Marriott"]["Classification"].value_counts()
# Convert to DataFrame for display
marriott_counts_df = marriott_counts.reset_index()
marriott_counts_df.columns = ["Classification", "Count"]
# Calculate CPP for Marriott
hotels_df.loc[hotels_df["Hotel Chain"] == "Marriott", "CPP"] = (
(hotels_df["Average Cash/Night"] - (hotels_df["Points Earned From Cash"] * 0.008)) /
hotels_df["Average Points/Night"]*100
)
print(hotels_df.shape) # Check if extra rows are removed
display(hotels_df.head()) # Preview the first few
Summary Statistics for Marriott Properties
Ranking Regions & Classifications with Highest CPP Average
# Calculate the average CPP grouped by Region for Marriott properties
marriott_avg_by_region = marriott_only_df.groupby(["Region", "Classification"])["CPP"].mean().reset_index().round(4)
# Filter countries with CPP above the average (0.70) and sort by highest CPP
marriott_cpp_regions = marriott_avg_by_region.sort_values(by="CPP", ascending=False).round(2)
# Display the results
marriott_cpp_regions
# Calculate the average CPP grouped by Region for Marriott properties
marriott_avg_by_region = marriott_only_df.groupby(["Region"])["CPP"].mean().reset_index().round(4)
# Filter countries with CPP above the average (0.70) and sort by highest CPP
marriott_cpp_regions = marriott_avg_by_region.sort_values(by="CPP", ascending=False).round(2)
# Display the results
marriott_cpp_regions
Luxury vs. Upscale Analysis
# Calculate the average CPP grouped by Region for Marriott properties
marriott_avg_by_class = marriott_only_df.groupby(["Classification"])["CPP"].median().reset_index().round(4)
# Filter countries and sort by highest CPP
marriott_cpp_class = marriott_avg_by_class.sort_values(by="CPP", ascending=False)
# Display the results
display(marriott_cpp_class)
# Group by Classification and count total properties
classification_counts = marriott_only_df.groupby("Classification")["CPP"].count()
# Count properties with an average CPP above 0.8 per classification
high_value_counts = marriott_only_df[marriott_only_df["CPP"] > 0.8].groupby("Classification")["CPP"].count()
# Calculate the percentage of properties above 0.8 CPP for each classification
percent_high_value_by_class = (high_value_counts / classification_counts) * 100
# Convert to a DataFrame for display
percent_high_value_by_class = percent_high_value_by_class.reset_index().rename(columns={"CPP": "Percent Above 0.8 CPP"})
# Round to 2 decimal places
percent_high_value_by_class["Percent Above 0.8 CPP"] = percent_high_value_by_class["Percent Above 0.8 CPP"].round(2)
display(percent_high_value_by_class)
Worst Redemptions
# Ensure CPP column is numeric for sorting
hotels_df["CPP"] = pd.to_numeric(hotels_df["CPP"], errors="coerce")
# Filter only Marriott properties
marriott_df = hotels_df[hotels_df["Hotel Chain"] == "Marriott"]
# Get bottom 10 Marriott properties by CPP
bottom_10_marriott_cpp = marriott_df.nsmallest(10, "CPP").round(2)
# Select only the necessary columns: Property name, Region, Country, and CPP
bottom_10_marriott_cpp_filtered = bottom_10_marriott_cpp[["Property", "Region", "Country", "CPP"]]
bottom_10_marriott_cpp_filtered
Best Redemptions
# Ensure CPP column is numeric for sorting
hotels_df["CPP"] = pd.to_numeric(hotels_df["CPP"], errors="coerce")
# Filter only Marriott properties
marriott_df = hotels_df[hotels_df["Hotel Chain"] == "Marriott"]
# Get top 10 Marriott properties by CPP
top_10_marriott_cpp = marriott_df.nlargest(10, "CPP").round(2)
# Select only the necessary columns: Property name and CPP
top_10_marriott_cpp_filtered = top_10_marriott_cpp[["Property", "Region", "Country", "CPP"]]
# Display the result
top_10_marriott_cpp_filtered
Countries with Highest CPP and Still Above Average (0.80)
# Fixing the syntax error caused by inconsistent quotation marks
# Calculate the average CPP grouped by Country for Marriott properties
marriott_avg_by_country = marriott_only_df.groupby(["Region","Country"])["CPP"].mean().reset_index().round(2)
# Filter countries with CPP above the average (0.80) and sort by highest CPP
marriott_cpp_countries_above_avg = marriott_avg_by_country[marriott_avg_by_country["CPP"] > 0.80].sort_values(by="CPP", ascending=False)
# Display the results
marriott_cpp_countries_above_avg
# Fixing the syntax error caused by inconsistent quotation marks
# Calculate the average CPP grouped by Country for Marriott properties
marriott_avg_by_country = marriott_only_df.groupby(["Region","Country"])["CPP"].mean().reset_index().round(2)
# Filter countries with CPP above the average (0.80) and sort by highest CPP
marriott_cpp_countries_above_avg = marriott_avg_by_country.sort_values(by="CPP", ascending=False)
# Display the results
marriott_cpp_countries_above_avg
# Fixing the syntax error caused by inconsistent quotation marks
# Calculate the average CPP grouped by Country for Marriott properties
marriott_avg_by_country = marriott_only_df.groupby(["Region", "Country"])["CPP"].mean().reset_index().round(2)
# Filter countries with CPP above the average (0.80) and sort by highest CPP
marriott_cpp_countries_above_avg = marriott_avg_by_country[marriott_avg_by_country["CPP"] > 0.80].sort_values(by="CPP", ascending=False)
# Count total number of Marriott properties per country
total_marriott_count = marriott_only_df.groupby("Country")["Property"].count().reset_index()
total_marriott_count.columns = ["Country", "Total Marriott Properties"]
# Filter properties where CPP is greater than 0.8
marriott_filtered = marriott_only_df[marriott_only_df["CPP"] > 0.8]
# Count Marriott properties over 0.8 CPP per country
marriott_over_08_count = marriott_filtered.groupby("Country")["Property"].count().reset_index()
marriott_over_08_count.columns = ["Country", "Marriott Properties Over 0.8 CPP"]
# Merge with total properties count
marriott_analysis = pd.merge(total_marriott_count, marriott_over_08_count, on="Country", how="left").fillna(0)
# Calculate the percentage
marriott_analysis["Percentage Over 0.8 CPP"] = (marriott_analysis["Marriott Properties Over 0.8 CPP"] / marriott_analysis["Total Marriott Properties"]) * 100
# Merge with countries above 0.80 CPP to get final dataset
final_marriott_analysis = pd.merge(marriott_cpp_countries_above_avg, marriott_analysis, on="Country", how="left").fillna(0)
# Sort by highest CPP
final_marriott_analysis = final_marriott_analysis.sort_values(by="CPP", ascending=False)
# Display results
final_marriott_analysis
Marriott CPP Distribution
import matplotlib.pyplot as plt
import seaborn as sns
# Calculate the mean CPP
mean_cpp = marriott_only_df["CPP"].mean()
median_cpp = marriott_only_df["CPP"].median()
# Create histogram with mean line
plt.figure(figsize=(10, 5))
sns.histplot(marriott_only_df["CPP"].dropna(), bins=30, kde=True, color="blue")
# Add mean CPP line
plt.axvline(mean_cpp, color='red', linestyle='dashed', linewidth=2, label=f'Mean CPP: {mean_cpp:.2f}')
# Add median CPP line
plt.axvline(median_cpp, color='orange', linestyle='dotted', linewidth=2, label=f'Median CPP: {median_cpp:.2f}')
# Add labels and title
plt.xlabel("Cents Per Point (CPP)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Distribution of Marriott CPP (Cents Per Point)", fontsize=12)
plt.legend()
# Show the plot
plt.show()