Create a model explanation function based on training data

This is the main function of the lime package. It is a factory function that returns a new function that can be used to explain the predictions made by black box models. This is a generic with methods for the different data types supported by lime.

# S3 method for data.frame
lime(
  x,
  model,
  preprocess = NULL,
  bin_continuous = TRUE,
  n_bins = 4,
  quantile_bins = TRUE,
  use_density = TRUE,
  ...
)

# S3 method for character
lime(
  x,
  model,
  preprocess = NULL,
  tokenization = default_tokenize,
  keep_word_position = FALSE,
  ...
)

# S3 method for imagefile
lime(x, model, preprocess = NULL, ...)

lime(x, model, ...)

Arguments

x: The training data used for training the model that should be explained.
model: The model whose output should be explained
preprocess: Function to transform a character vector to the format expected from the model.
bin_continuous: Should continuous variables be binned when making the explanation
n_bins: The number of bins for continuous variables if bin_continuous = TRUE
quantile_bins: Should the bins be based on n_bins quantiles or spread evenly over the range of the training data
use_density: If bin_continuous = FALSE should continuous data be sampled using a kernel density estimation. If not, continuous features are expected to follow a normal distribution.
...: Arguments passed on to methods
tokenization: function used to tokenize text for the permutations.
keep_word_position: set to TRUE if to keep order of words. Warning: each word will be replaced by word_position.

Value

Return an explainer which can be used together with explain() to explain model predictions.

Examples

# Explaining a model based on tabular data
library(MASS)
iris_test <- iris[1, 1:4]
iris_train <- iris[-1, 1:4]
iris_lab <- iris[[5]][-1]
# Create linear discriminant model on iris data
model <- lda(iris_train, iris_lab)
# Create explanation object
explanation <- lime(iris_train, model)

# This can now be used together with the explain method
explain(iris_test, explanation, n_labels = 1, n_features = 2)
#> # A tibble: 2 × 13
#>   model_type case  label label…¹ model…² model…³ model…⁴ feature featu…⁵ featu…⁶
#>   <chr>      <chr> <chr>   <dbl>   <dbl>   <dbl>   <dbl> <chr>     <dbl>   <dbl>
#> 1 classific… 1     seto…       1   0.558  0.0966   0.997 Petal.…     1.4   0.747
#> 2 classific… 1     seto…       1   0.558  0.0966   0.997 Petal.…     0.2   0.154
#> # … with 3 more variables: feature_desc <chr>, data <list>, prediction <list>,
#> #   and abbreviated variable names ¹label_prob, ²model_r2, ³model_intercept,
#> #   ⁴model_prediction, ⁵feature_value, ⁶feature_weight
#> # ℹ Use `colnames()` to see all variable names

if (FALSE) {
# Explaining a model based on text data

# Purpose is to classify sentences from scientific publications
# and find those where the team writes about their own work
# (category OWNX in the provided dataset).

library(text2vec)
library(xgboost)

data(train_sentences)
data(test_sentences)

get_matrix <- function(text) {
  it <- itoken(text, progressbar = FALSE)
  create_dtm(it, vectorizer = hash_vectorizer())
}

dtm_train = get_matrix(train_sentences$text)

xgb_model <- xgb.train(list(max_depth = 7, eta = 0.1, objective = "binary:logistic",
                 eval_metric = "error", nthread = 1),
                 xgb.DMatrix(dtm_train, label = train_sentences$class.text == "OWNX"),
                 nrounds = 50)

sentences <- head(test_sentences[test_sentences$class.text == "OWNX", "text"], 1)
explainer <- lime(train_sentences$text, xgb_model, get_matrix)
explanations <- explain(sentences, explainer, n_labels = 1, n_features = 2)

# We can see that many explanations are based
# on the presence of the word `we` in the sentences
# which makes sense regarding the task.
print(explanations)
}
if (FALSE) {
library(keras)
library(abind)
# get some image
img_path <- system.file('extdata', 'produce.png', package = 'lime')
# load a predefined image classifier
model <- application_vgg16(
  weights = "imagenet",
  include_top = TRUE
)

# create a function that prepare images for the model
img_preprocess <- function(x) {
  arrays <- lapply(x, function(path) {
    img <- image_load(path, target_size = c(224,224))
    x <- image_to_array(img)
    x <- array_reshape(x, c(1, dim(x)))
    x <- imagenet_preprocess_input(x)
  })
  do.call(abind, c(arrays, list(along = 1)))
}

# Create an explainer (lime recognise the path as an image)
explainer <- lime(img_path, as_classifier(model, unlist(labels)), img_preprocess)

# Explain the model (can take a long time depending on your system)
explanation <- explain(img_path, explainer, n_labels = 2, n_features = 10, n_superpixels = 70)
}