Multi Touch Attribution

!ls /datasets/datahub/ml-repo/multi-touch-attribution

import pandas as pd import itertools from collections import defaultdict from itertools import permutations,combinations import numpy as np from collections import Counter import matplotlib.pyplot as plt from pandas.plotting import scatter_matrix %matplotlib inline import seaborn as sns from gekko import GEKKO from pandas_profiling import ProfileReport from PIL import Image import requests

#Importing dataset data=pd.read_csv("/datasets/datahub/ml-repo/multi-touch-attribution/attribution_data.csv") data=pd.DataFrame(data) data

#Creating exploratory data analysis report using pandas profiling python module profile = ProfileReport(data, title='Attribution Data Report', explorative=True)

#Displaying the report profile.to_widgets()

profile1 = ProfileReport(data, title='Attribution Data Report', explorative=True) profile1.to_file("Report.html") #An html report is being generated and saved in '\content'

#Here's an image for a better understanding of last-touch attribution model im = Image.open(requests.get('https://adidodigital.s3.amazonaws.com/Blogs-2020/last%20touch%20attribution%20model.jpg', stream=True).raw) im

def last_touch_model(dt, conv_col,channel_col): #Defining function for last touch model last_touch=dt.loc[dt[conv_col]==1] #Extracting rows where conversion is 1 res_last_touch=pd.DataFrame(round(last_touch[channel_col].value_counts(normalize=True)*100,2)) #Calculating the weightage res_last_touch.columns=['Weightage(%)'] return res_last_touch

last_touch=last_touch_model(data, 'conversion', 'channel') #Calling the last touch model function last_touch

#Plotting the weightage with respect to channels plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=last_touch.index, data=last_touch) plt.show()

#Here's an image for a better understanding of first-touch attribution model im = Image.open(requests.get('https://adidodigital.s3.amazonaws.com/Blogs-2020/first-touch.jpg', stream=True).raw) im

def first_touch_model(dt, conv_col,channel_col,user_id): temp=dt.loc[dt[conv_col]==1] #Saving the dataframe where all the conversions are 1 into temp variable first_touch=pd.DataFrame(dt.groupby(user_id).first(),index=dt[user_id]) #Grouping with respect to cookie and then keeping only the first instance of every cookie cookie_index=list(temp[user_id]) #making a list of cookie column of temp mid_first_touch=first_touch.loc[cookie_index] #locating cookie index in the first touch dataframe res_first_touch=pd.DataFrame(round(mid_first_touch[channel_col].value_counts(normalize=True)*100,2)) #Calculating weightage res_first_touch.columns=['Weightage(%)'] return res_first_touch

first_touch = first_touch_model(data, 'conversion', 'channel', 'cookie') first_touch

plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=first_touch.index, data=first_touch) plt.show()

#Here's an image for a better understanding of last-non-direct attribution model im = Image.open(requests.get('https://cloudrock.asia/wp-content/uploads/2016/10/Last-Non-Direct-Click-Attribution-Modelling.png', stream=True).raw) im

def last_non_direct_model(dt, conv_col, channel_col, user_id): slp=pd.DataFrame(dt.groupby(user_id).tail(2)) #Grouping by cookie and keeping the last two observation for each cookie temp=slp.loc[slp[conv_col]==1] last_non_direct=pd.DataFrame(slp.groupby(user_id).first(),index=slp[user_id]) cookie_index=list(temp[user_id]) mid_last_non_direct=last_non_direct.loc[cookie_index] res_last_non_direct=pd.DataFrame(round(mid_last_non_direct[channel_col].value_counts(normalize=True)*100,2)) res_last_non_direct.columns=['Weightage(%)'] return res_last_non_direct

last_non_direct = last_non_direct_model(data, 'conversion', 'channel', 'cookie') last_non_direct

plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=last_non_direct.index, data=last_non_direct) plt.show()

#Here's an image for a better understanding of Linear Attribution Model im = Image.open(requests.get('https://www.marketingevolution.com/hs-fs/hubfs/linear%20attribution%20model.png?width=525&name=linear%20attribution%20model.png', stream=True).raw) im

def linear_model(dt, conv_col, channel_col, user_id): #Keeping data of only those users who are getting converted at the end pd.options.mode.chained_assignment = None temp=dt.loc[dt[conv_col]==1] cookie_index=list(temp[user_id]) dt['new']=dt[user_id].isin(cookie_index) y=dt['new'].isin([True]) dt_conv=dt[y] temp=pd.DataFrame(dt_conv.groupby(user_id).tail(1)) x=Counter(dt_conv[user_id]) temp['click_count']=x.values() #Click count is total number of channels visited by an user temp.set_index(user_id,inplace=True) count=Counter(dt_conv[user_id]) dt_conv['clicks']=dt_conv[user_id].map(count) #Adding click count to the filtered the data dt_conv=dt_conv.assign(click_per=lambda x: round(100/dt_conv['clicks'],2)) #Assigning the weightages in a linear fashion #Getting the mean weightage of every channels res_linear=dt_conv.groupby(channel_col, as_index=False)['click_per'].mean() sum=res_linear['click_per'].sum() res_linear['Weightage(%)']=res_linear.apply(lambda x: round((x['click_per']/sum)*100,2),axis=1) res_linear.drop(['click_per'],inplace=True,axis=1) res_linear=res_linear.set_index(channel_col) res_linear.index.name=None return res_linear

linear = linear_model(data, 'conversion', 'channel', 'cookie') #Calling the linear model function linear

plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=linear.index, data=linear) plt.show()

#Here's an image for a better understanding of Position Based (U-shaped) Attribution Model im = Image.open(requests.get('https://www.marketingevolution.com/hs-fs/hubfs/U-shaped%20attribution%20model.png?width=525&name=U-shaped%20attribution%20model.png', stream=True).raw) im

def calc_attribution(click_pos,total_clicks): #Assigning weightage according to the position of the channel default_att = 0.5 #if a user has visited only 2 channels extreme_touch_att_1 = 0.4 #Assigining weightage to first and last channels intermed_att_1 = 0.2 #Total weightage for remaining channels if total_clicks == 2: return default_att elif total_clicks == 1: return 1 else: if click_pos == total_clicks or click_pos ==1: return extreme_touch_att_1 else: return intermed_att_1/(total_clicks-2) #Giving equal weightage to all the mid channels from the remaining 20%

def u_shaped_model(dt, conv_col, channel_col, user_id): #Keeping data of only those users who are getting converted at the end pd.options.mode.chained_assignment = None #Ignoring pandas warnings temp=dt.loc[dt[conv_col]==1] cookie_index=list(temp[user_id]) dt['new']=dt[user_id].isin(cookie_index) y=dt['new'].isin([True]) dt_conv=dt[y] count=Counter(dt_conv[user_id]) dt_conv['clicks']=dt_conv[user_id].map(count) dt_conv['click_pos'] = dt_conv.groupby(user_id).cumcount() + 1 #Giving ranks to the channel for each user_id dt_Ushaped=dt_conv dt_Ushaped['U_Shape'] = dt_conv.apply(lambda val: round(calc_attribution(val.click_pos,val.clicks)*100,2),axis=1) #Getting the mean weightage of every channels res_Ushaped=dt_Ushaped.groupby(channel_col, as_index=False)['U_Shape'].mean() sum=res_Ushaped['U_Shape'].sum() res_Ushaped['Weightage(%)']=res_Ushaped.apply(lambda x: round((x['U_Shape']/sum)*100,2),axis=1) res_Ushaped.drop(['U_Shape'],inplace=True,axis=1) res_Ushaped=res_Ushaped.set_index(channel_col) res_Ushaped.index.name=None return res_Ushaped

u_shaped = u_shaped_model(data, 'conversion', 'channel', 'cookie') #Calling u_shaped model function u_shaped

plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=u_shaped.index, data=u_shaped) plt.show()

#Here's an image for a better understanding of Position Decay Attribution Model im = Image.open(requests.get('https://www.marketingevolution.com/hs-fs/hubfs/time%20decay%20attribution%20model.gif?width=525&name=time%20decay%20attribution%20model.gif', stream=True).raw) im

def calc_attribution(click_pos,total_clicks): rel_pos = total_clicks - click_pos attribution = pow(2, -(rel_pos)) #Assigning weightage to channels in the negative power of 2 depending on their position return attribution

def pos_decay_model(dt, conv_col, channel_col, user_id): #Keeping data of only those users who are getting converted at the end pd.options.mode.chained_assignment = None temp=dt.loc[dt[conv_col]==1] cookie_index=list(temp[user_id]) dt['new']=dt[user_id].isin(cookie_index) y=dt['new'].isin([True]) dt_conv=dt[y] dt_conv['temp']=1 count=Counter(dt_conv[user_id]) dt_conv['clicks']=dt_conv[user_id].map(count) dt_conv=dt_conv.assign(click_per=lambda x: round(100/dt_conv['clicks'],2)) dt_conv['click_pos'] = dt_conv.groupby(user_id).cumcount() + 1 #Giving ranks to channels according to user_id dt_conv['PosDecay'] = dt_conv.apply(lambda val: calc_attribution(val.click_pos,val.clicks)*100,axis=1) dt_pos_decay=dt_conv #Getting the mean weightage of every channels res_pos_decay=dt_pos_decay.groupby('channel', as_index=False)['PosDecay'].mean() sum=res_pos_decay['PosDecay'].sum() res_pos_decay['Weightage(%)']=res_pos_decay.apply(lambda x: round((x['PosDecay']/sum)*100,2),axis=1) res_pos_decay.drop(['PosDecay'], axis=1,inplace=True) res_pos_decay=res_pos_decay.set_index(channel_col) res_pos_decay.index.name=None return res_pos_decay

pos_decay = pos_decay_model(data, 'conversion', 'channel', 'cookie') pos_decay

plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=pos_decay.index, data=pos_decay) plt.show()

#Here's an image for a better understanding of Markov Attribution Model im = Image.open(requests.get('https://media.geeksforgeeks.org/wp-content/uploads/Markov-chain-1.png', stream=True).raw) im

def transition_states(list_of_paths): list_of_unique_channels = set(x for element in list_of_paths for x in element) transition_states = {x + '>' + y: 0 for x in list_of_unique_channels for y in list_of_unique_channels} #Creating all possible paths between any 2 channels #Getting the frequencies for all possible combinations for possible_state in list_of_unique_channels: if possible_state not in ['Conversion', 'Null']: for user_path in list_of_paths: if possible_state in user_path: indices = [i for i, s in enumerate(user_path) if possible_state in s] for col in indices: transition_states[user_path[col] + '>' + user_path[col + 1]] += 1 return transition_states

def transition_prob(trans_dict,list_of_paths): #Assigning probabilities to each combination of paths of length 2 list_of_unique_channels = set(x for element in list_of_paths for x in element) trans_prob = defaultdict(dict) for state in list_of_unique_channels: if state not in ['Conversion', 'Null']: counter = 0 index = [i for i, s in enumerate(trans_dict) if state + '>' in s] for col in index: if trans_dict[list(trans_dict)[col]] > 0: counter += trans_dict[list(trans_dict)[col]] for col in index: if trans_dict[list(trans_dict)[col]] > 0: state_prob = float((trans_dict[list(trans_dict)[col]])) / float(counter) trans_prob[list(trans_dict)[col]] = state_prob return trans_prob

def transition_matrix(list_of_paths, transition_probabilities): #Creating a transition matrix using the probabilities of each paths of length 2 trans_matrix = pd.DataFrame() list_of_unique_channels = set(x for element in list_of_paths for x in element) for channel in list_of_unique_channels: trans_matrix[channel] = 0.00 trans_matrix.loc[channel] = 0.00 trans_matrix.loc[channel][channel] = 1.0 if channel in ['Conversion', 'Null'] else 0.0 for key, value in transition_probabilities.items(): origin, destination = key.split('>') trans_matrix.at[origin, destination] = value return trans_matrix

def removal_effects(dt, conversion_rate): #Calculating the effect of channel if it was removed removal_effects_dict = {} channels = [channel for channel in dt.columns if channel not in ['Start','Null','Conversion']] for channel in channels: removal_dt = dt.drop(channel, axis=1).drop(channel, axis=0) for column in removal_dt.columns: row_sum = np.sum(list(removal_dt.loc[column])) null_pct = float(1) - row_sum if null_pct != 0: removal_dt.loc[column]['Null'] = null_pct removal_dt.loc['Null']['Null'] = 1.0 removal_to_conv = removal_dt[ ['Null', 'Conversion']].drop(['Null', 'Conversion'], axis=0) removal_to_non_conv = removal_dt.drop( ['Null', 'Conversion'], axis=1).drop(['Null', 'Conversion'], axis=0) removal_inv_diff = np.linalg.inv( np.identity( len(removal_to_non_conv.columns)) - np.asarray(removal_to_non_conv)) removal_dot_prod = np.dot(removal_inv_diff, np.asarray(removal_to_conv)) removal_cvr = pd.DataFrame(removal_dot_prod, index=removal_to_conv.index)[[1]].loc['Start'].values[0] removal_effect = 1 - removal_cvr / conversion_rate removal_effects_dict[channel] = removal_effect return removal_effects_dict

def markov_chain_allocations(removal_effects, total_conversions): re_sum = np.sum(list(removal_effects.values())) return {k: (v / re_sum) * total_conversions for k, v in removal_effects.items()}

def markov_model(df, conv_col, channel_col, user_id): pd.options.mode.chained_assignment = None df = df.sort_values(user_id) df['visit_order'] = df.groupby(user_id).cumcount() + 1 df_paths = df.groupby(user_id)[channel_col].aggregate(lambda x: x.unique().tolist()).reset_index() df_last_interaction = df.drop_duplicates(user_id, keep='last')[[user_id, conv_col]] df_paths = pd.merge(df_paths, df_last_interaction, how='left', on=user_id) df_paths['start'] = [["Start"] for i in range(len(df_paths[conv_col]))] df_paths['buff'] = [["Conversion"] for i in range(len(df_paths[conv_col]))] df_paths['null'] = [["Null"] for i in range(len(df_paths[conv_col]))] df_paths['path'] = np.where(df_paths[conv_col] == 0, df_paths['start'] + df_paths[channel_col] + df_paths['null'], df_paths['start'] + df_paths[channel_col] + df_paths['buff']) df_paths = df_paths[[user_id, 'path']] list_of_paths = df_paths['path'] total_conversions = np.sum(a.count('Conversion') for a in df_paths['path'].tolist()) base_conversion_rate = total_conversions / len(list_of_paths) trans_states = transition_states(list_of_paths) trans_prob = transition_prob(trans_states, list_of_paths) trans_matrix = transition_matrix(list_of_paths, trans_prob) removal_effects_dict = removal_effects(trans_matrix, base_conversion_rate) #Creating a dictionary of the removal effect attributions = markov_chain_allocations(removal_effects_dict, total_conversions) #Allocating markov chains res_markov=pd.DataFrame(attributions.values(),index=attributions.keys()) res_markov.columns=['weightage'] sum=res_markov['weightage'].sum() res_markov['Weightage(%)']=res_markov.apply(lambda x: round((x['weightage']/sum)*100,2),axis=1) res_markov.drop(['weightage'], axis=1,inplace=True) res_markov=res_markov.sort_index() return res_markov

markov = markov_model(data, 'conversion', 'channel', 'cookie') markov

plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=markov.index, data=markov) plt.show()

def power_set(List): #Creating a power set of a given list PS = [list(j) for i in range(len(List)) for j in itertools.combinations(List, i+1)] return PS

def subsets(s): ''' This function returns all the possible subsets of a set of channels. input : - s: a set of channels. ''' if len(s)==1: return s else: sub_channels=[] for i in range(1,len(s)+1): sub_channels.extend(map(list,itertools.combinations(s, i))) return list(map(",".join,map(sorted,sub_channels)))

def v_function(A,C_values): ''' This function computes the worth of each coalition. inputs: - A : a coalition of channels. - C_values : A dictionnary containing the number of conversions that each subset of channels has yielded. ''' subsets_of_A = subsets(A) #print(subsets_of_A) #exit() worth_of_A=0 for subset in subsets_of_A: #print("subset:", subset) if subset in C_values: #print("subset:", subset, "; Value:", C_values[subset]) worth_of_A += C_values[subset] return worth_of_A

def factorial(n): if n == 0: return 1 else: return n * factorial(n-1)

def calculate_shapley(df, col_name): ''' This function returns the shapley values - df: A dataframe with the two columns: ['channels_subset', 'conversion_sum']. - col_name: A string that is the name of the column with conversions in the dataframe **Make sure that that each value in channel_subset is in alphabetical order. Facebook,Paid Search and Paid Search,Facebook are the same in regards to this analysis and should be combined under Facebook,Paid Search. ***Be careful with the distinct number of channels because this can signifcantly slow the perfomance of this function. ''' c_values = df.set_index("channels_subset").to_dict()[col_name] df['channels'] = df['channels_subset'].apply(lambda x: x if len(x.split(",")) == 1 else np.nan) channels = list(df['channels'].dropna().unique()) v_values = {} for A in power_set(channels): v_values[','.join(sorted(A))] = v_function(A,c_values) #print(v_values) n=len(channels) shapley_values = defaultdict(int) for channel in channels: for A in v_values.keys(): #print(A) if channel not in A.split(","): #print(channel) cardinal_A=len(A.split(",")) A_with_channel = A.split(",") A_with_channel.append(channel) A_with_channel=",".join(sorted(A_with_channel)) # Weight = |S|!(n-|S|-1)!/n! weight = (factorial(cardinal_A)*factorial(n-cardinal_A-1)/factorial(n)) # Marginal contribution = v(S U {i})-v(S) contrib = (v_values[A_with_channel]-v_values[A]) shapley_values[channel] += weight * contrib # Add the term corresponding to the empty set shapley_values[channel]+= v_values[channel]/n return shapley_values

def shapley_model(df, conv_col, channel_col, user_id): dt_paths = df.sort_values(channel_col).groupby(user_id)[channel_col].aggregate(lambda x: x.unique().tolist()).reset_index() dt_paths['channels']=[str(x) for x in dt_paths[channel_col]] channel_count=Counter(dt_paths['channels']) channel_ct=pd.DataFrame(channel_count.items()) channel_ct[0] = channel_ct[0].apply(lambda x: x.replace('[','').replace(']','').replace("'","").replace(", ",",")) channel_ct.columns=['channels_subset','conversion_sum'] attribution=calculate_shapley(channel_ct,'conversion_sum') res_shapley=pd.DataFrame(attribution.values(),index=attribution.keys()) res_shapley.columns=['weightage'] sum=res_shapley['weightage'].sum() res_shapley['Weightage(%)']=res_shapley.apply(lambda x: round((x['weightage']/sum)*100,2),axis=1) res_shapley.drop(['weightage'], axis=1,inplace=True) res_shapley = res_shapley.sort_index() return res_shapley

shapley=shapley_model(data, 'conversion', 'channel', 'cookie') shapley

#visualizations plt.subplots(figsize=(18, 6)) sns.barplot(y='Weightage(%)', x=shapley.index, data=shapley) plt.show()

Combined_dataframe = pd.concat([last_touch,first_touch,last_non_direct,linear,u_shaped,pos_decay,markov,shapley],axis=1) Combined_dataframe.columns=['Last-Touch','First-Touch','Last-Non-direct','Linear','U-shaped','Position Decay','Markov','Shapley'] Combined_dataframe['Mean'] = round(Combined_dataframe.mean(axis=1),2) Combined_dataframe

X_axis = np.arange(len(first_touch.index)) plt.bar(X_axis - 0.2, last_touch['Weightage(%)'],0.2,label='last touch') plt.bar(X_axis , first_touch['Weightage(%)'],0.2,label='first touch') plt.bar(X_axis + 0.2, last_non_direct['Weightage(%)'],0.2,label='last non-direct touch') plt.xticks(X_axis, first_touch.index) plt.legend() plt.show() print() X_axis = np.arange(len(first_touch.index)) plt.bar(X_axis - 0.2, linear['Weightage(%)'],0.2,label='linear model') plt.bar(X_axis , u_shaped['Weightage(%)'],0.2,label='U-shaped model') plt.bar(X_axis + 0.2, pos_decay['Weightage(%)'],0.2,label='position decay model') plt.xticks(X_axis, first_touch.index) plt.legend() plt.show() print() X_axis = np.arange(len(markov.index)) plt.bar(X_axis - 0.2, markov['Weightage(%)'],0.4,label='markov-chain model model') plt.bar(X_axis + 0.2, shapley['Weightage(%)'],0.4,label='shapley value model') plt.xticks(X_axis, first_touch.index) plt.legend() plt.show()

m=GEKKO() n=5 budget=int(input("Enter campaign budget: ")) #Taking input of the budget for a campaign coeff_A = Combined_dataframe['Mean'].tolist() print() ch_names=list(sorted(Combined_dataframe.index)) #Assigning lower bound and upper bound to each channel x1 = m.Var(lb=100, ub=budget) x2 = m.Var(lb=100, ub=budget) x3 = m.Var(lb=100, ub=budget) x4 = m.Var(lb=100, ub=budget) x5 = m.Var(lb=100, ub=budget) print() for j in range(n): print("Channel",j+1,"should not exceed : ", end='') #Taking input of upper bound spending on each channel z=int(input()) m.Equation(globals()["x" + str(j+1)] <= z) m.Equation(x1+x2+x3+x4+x5 <= budget) m.Maximize(coeff_A[0]*x1 + coeff_A[1]*x2 + coeff_A[2]*x3 + coeff_A[3]*x4 + coeff_A[4]*x5) m.solve(disp=False) p1 = x1.value[0] p2 = x2.value[0] p3 = x3.value[0] p4 = x4.value[0] p5 = x5.value[0] #Printing the budget along with the channel names print('\n\nBudgets:\n\n') print(ch_names[0] + ": " + str(round(p1,0))) print(ch_names[1] + ": " + str(round(p2,0))) print(ch_names[2] + ": " + str(round(p3,0))) print(ch_names[3] + ": " + str(round(p4,0))) print(ch_names[4] + ": " + str(round(p5,0)))