import pandas as pd from xgboost import XGBRegressor, DMatrix import shap from sklearn.model_selection import train_test_split import numpy as np import transformers import nlp import scipy as sp import logging from transformers import AutoTokenizer, AutoModelForCausalLM,AutoModelWithLMHead, AutoTokenizer import torch import datasets transformers.logging.get_verbosity = lambda : logging.NOTSET datasets.logging.get_verbosity = lambda : logging.NOTSET

Machine Learning Explainability

What are SHAP Values?

How do they do this?

The Shap Library

Example Use-cases

Tabular Data

What makes a good Tinder date?

df = pd.read_csv("/work/okcupid_profiles.csv",engine='python') original = df.copy() df.head()

df = df[df.income>0]

df.drop(columns=['last_online']) for column in df.columns: if df[column].dtype not in ['int64']: df[column] = df[column].astype('category').cat.codes df.drop(columns=[col for col in df.columns if 'essay' in col],inplace=True) for col in df.select_dtypes(include=np.number): df[col] = df[col].fillna(df[col].mode()) df.head()

y = df.pop('income').copy() X = df.copy() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

explainer = shap.TreeExplainer(model) shap_values = explainer.shap_values(X_test)

X_test.iloc[100]

y_test.iloc[100]

import tempfile from IPython.core.display import display, HTML shap.initjs() def shap_deepnote_show(plot): tmp_output_filename = tempfile.NamedTemporaryFile(suffix='.html').name shap.save_html(tmp_output_filename, plot) f = open(tmp_output_filename, "r") data = f.read() display(HTML(data))

shap_deepnote_show(shap.force_plot(explainer.expected_value, shap_values[100], X_test.iloc[100].values,feature_names = X_test.columns))

dict(enumerate(original['religion'].astype('category').cat.categories ) )

y_train.mean()

explainer.expected_value

Transformers and Text Generation

tokenizer = AutoTokenizer.from_pretrained("gpt2") model = AutoModelForCausalLM.from_pretrained("gpt2") # set model decoder to true model.config.is_decoder=True # set text-generation params under task_specific_params model.config.task_specific_params["text-generation"] = { "do_sample": True, "max_length": 50, "temperature": 0.4, "top_k": 50, "no_repeat_ngram_size": 2 }

prompt = ['Math is probably the most difficult subject in school. The problem is '] explainer = shap.Explainer(model,tokenizer)

shap_values = explainer(prompt)

shap.plots.text(shap_values)

Computer Vision

from tensorflow import keras model = keras.models.load_model('cnn.h5')

np.random.seed = 356 # select a set of background examples to take an expectation over background = x_train[np.random.choice(x_train.shape[0], 100, replace=False)] # explain predictions of the model on four images e = shap.DeepExplainer(model, background) start = 10 shap_values = e.shap_values(x_test[start:start+5]) # plot the feature attributions shap.image_plot(shap_values, x_test[start:start+5])

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Machine Learning Explainability