objectiv-predict-user-behaviour

from modelhub import ModelHub from bach import display_sql_as_markdown

# instantiate the model hub modelhub = ModelHub(time_aggregation='%Y-%m-%d')

# extract the root location from the location stack df['root'] = df.location_stack.ls.get_from_context_with_type_series(type='RootLocationContext', key='id') # root series is later unstacked and its values might contain dashes # which are not allowed in BigQuery column names, lets replace them df['root'] = df['root'].str.replace('-', '_')

# only look at press events and count the root locations features = df[(df.event_type=='PressEvent')].groupby('user_id').root.value_counts()

# unstack the series, to create a DataFrame with the number of clicks per root location as columns features_unstacked = features.unstack(fill_value=0)

# for BigQuery the table name should be 'YOUR_PROJECT.YOUR_WRITABLE_DATASET.YOUR_TABLE_NAME' features_set_sample = features_unstacked.get_sample('test_lr_sample', sample_percentage=10, overwrite=True)

y_column = 'modeling' y = features_set_sample[y_column] > 0 X = features_set_sample.drop(columns=[y_column])

X.head()

y.head()

lr = modelhub.get_logistic_regression(fit_intercept=False)

lr.fit(X, y)

lr.score(X, y)

# show the coefficients of the fitted model lr.coef_

features_set_sample['predicted_values'] = lr.predict_proba(X) features_set_sample['predicted_labels'] = lr.predict(X)

# show the sampled data set, including predictions features_set_sample.head(20)

features_set_full = features_set_sample.get_unsampled()

display_sql_as_markdown(features_set_full)