Reading the data
import pandas as pd
import plotly.express as px
Run to view results
DB Clients
MixPanel
Select uuid,client_type as "db_client_type",client_type_bc, source_country from ANALYTICS.PROD_MODELED.CLIENTS
Run to view results
# ler CSV com dados de checkut https://mixpanel.com/project/2487192/view/3029348/app/boards#id=6920713&edited-bookmark=AdTNpQ3pTqSP
df_checkout = pd.read_csv('/work/20240327_Checkoutstartedperclienttype.csv', usecols=['userId', 'client_type'])
# remover usuarios undefinied
df_checkout = df_checkout[df_checkout['userId'] != 'undefined']
# apagar os 3 primeiros digitos
df_checkout['userId'] = df_checkout['userId'].apply(lambda x: x[3:])
# manter apenas ids unicos
df_checkout = df_checkout.drop_duplicates(subset=['userId'])
Run to view results
Mixing the dbs
merged_df = pd.merge(df_checkout, dfClients, left_on='userId', right_on='uuid', how='left')
merged_df['client_type'] = merged_df['client_type'].str.lower()
merged_df['db_client_type'] = merged_df['db_client_type'].str.lower()
merged_df['client_type_bc'] = merged_df['client_type_bc'].str.lower()
merged_df
Run to view results
Showing the data
dfCTCTBC = merged_df.copy()
dfCTCTBC['match'] = dfCTCTBC['client_type'] == dfCTCTBC['client_type_bc']
# Passo 3: Calcular a porcentagem de match para cada tipo de cliente
match_rate = dfCTCTBC.groupby('client_type')['match'].mean() * 100
# Transformando match_rate em DataFrame para facilitar a plotagem
match_rate_df = match_rate.reset_index().rename(columns={'match': 'match_rate'})
# Passo 4: Criar o gráfico com Plotly
fig = px.bar(match_rate_df, x='client_type', y='match_rate', title='% of match on mixpanel (client_type) and Clients table (client_type_bc) ', labels={'match_rate': 'Porcentagem de Match on client table (%)', 'client_type': 'Tipo de Cliente on MixPanel'})
fig.update_yaxes(range=[0, 100]) # Garante que a escala do eixo y vá de 0 a 100%
fig.show()
Run to view results
dfCTCT = merged_df.copy()
dfCTCT['match'] = dfCTCT['client_type'] == dfCTCT['db_client_type']
# Passo 3: Calcular a porcentagem de match para cada tipo de cliente
match_rate = dfCTCT.groupby('client_type')['match'].mean() * 100
# Transformando match_rate em DataFrame para facilitar a plotagem
match_rate_df = match_rate.reset_index().rename(columns={'match': 'match_rate'})
# Passo 4: Criar o gráfico com Plotly
fig = px.bar(match_rate_df, x='client_type', y='match_rate', title='% of match on mixpanel (client_type) and Clients table (client_type) ', labels={'match_rate': 'Porcentagem de Match on client table (%)', 'client_type': 'Tipo de Cliente on MixPanel'})
fig.update_yaxes(range=[0, 100]) # Garante que a escala do eixo y vá de 0 a 100%
fig.show()
Run to view results