select * from analytics.production.REPORT_STATIC_INSTANT_VALUATION_EXPERIMENT_DETAILS
import pandas as pd
import numpy as np
df = snowflake_exp_details
df.describe()
df_population_size = df.groupby(['experiment_group'])['buyer_deals_lead_property_id'].nunique().reset_index(name="number_of_approved_valuation_requests")
df_rates = df[(df['is_offer_prepared'] == True)].groupby('experiment_group')['buyer_deals_lead_property_id'].count() / df.groupby('experiment_group')['buyer_deals_lead_property_id'].nunique()
df_rates = df_rates.reset_index(name='rate_offer_prepared')
df_results = df_population_size.merge(df_rates, how='left', on='experiment_group', suffixes=('_x', '_y'))
df_results.head()
def _pooled_proportion(p1, p2, n1, n2):
return (p1*n1 + p2*n2)/(n1+n2)
def z_score(p1, p2, n1, n2, pooled_proportion):
try:
score = (p1-p2) / np.sqrt(pooled_proportion*(1-pooled_proportion)*(1/n1+1/n2))
except ZeroDivisionError:
score = 0
return score
## lol why did you do it like this
p1 = df_results[df_results['experiment_group'] == 'Control']['rate_offer_prepared'][0]
p2 = df_results[df_results['experiment_group'] == 'Variant']['rate_offer_prepared'][1]
n1 = df_results[df_results['experiment_group'] == 'Control']['number_of_approved_valuation_requests'][0].round(3)
n2 = df_results[df_results['experiment_group'] == 'Variant']['number_of_approved_valuation_requests'][1].round(3)
pooled_proportion = _pooled_proportion(p1, p2, n1, n2)
z_score_result = z_score(p1, p2, n1, n2, pooled_proportion)
z_score_result
from scipy.stats import norm
z_score_p_value = (norm(0, 1).cdf(-np.absolute(z_score_result)) * 2).round(3)
z_score_p_value
# testing if you can visualize yourself
import seaborn as sns
sns.barplot(data=df_results, x='experiment_group', y='rate_offer_prepared')