salesforce
diff --git a/‎README.md
Lines changed: 4 additions & 29 deletions b/‎README.md
Lines changed: 4 additions & 29 deletions
diff --git a/‎omnixai/data/image.py
Lines changed: 2 additions & 2 deletions b/‎omnixai/data/image.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎omnixai/data/multi_inputs.py
Lines changed: 73 additions & 0 deletions b/‎omnixai/data/multi_inputs.py
Lines changed: 73 additions & 0 deletions
diff --git a/‎omnixai/explainers/nlp/specific/ig.py
Lines changed: 54 additions & 40 deletions b/‎omnixai/explainers/nlp/specific/ig.py
Lines changed: 54 additions & 40 deletions
diff --git a/‎omnixai/explainers/vision_language/__init__.py
Lines changed: 12 additions & 0 deletions b/‎omnixai/explainers/vision_language/__init__.py
Lines changed: 12 additions & 0 deletions
diff --git a/‎omnixai/explainers/vision_language/specific/__init__.py b/‎omnixai/explainers/vision_language/specific/__init__.py
diff --git a/‎omnixai/explainers/vision_language/specific/gradcam/__init__.py
Lines changed: 9 additions & 0 deletions b/‎omnixai/explainers/vision_language/specific/gradcam/__init__.py
Lines changed: 9 additions & 0 deletions
@@ -77,35 +77,9 @@ explanation methods for vision, NLP and time-series tasks.
 for text data. *Counterfactual* accepts black box models for tabular, text and time-series data, and PyTorch/Tensorflow models for
 image data.
 
-The following table shows the comparison between our toolkit/library and other existing XAI toolkits/libraries
-in literature:
-
-| Data Type |        Method        | OmniXAI | InterpretML | AIX360 | Eli5  | Captum | Alibi | explainX
-:---:       |:--------------------:| :---:  | :---:       | :---:  | :---: | :---:  | :---: | :---:
-| Tabular   |         LIME         | ✅ | ✅ | ✅ | | ✅ | | |
-|           |         SHAP         | ✅ | ✅ | ✅ | | ✅ | ✅ | ✅ |
-|           |         PDP          | ✅ | ✅ | | | | | |
-|           |         ALE          | ✅ | | | | | ✅ | |
-|           |     Sensitivity      | ✅ | ✅ | | | | | |
-|           | Integrated gradient  | ✅ | | | | ✅ | ✅ | |
-|           |    Counterfactual    | ✅ | | | | | ✅ | |
-|           |    Linear models     | ✅ | ✅ | ✅ | ✅ | | ✅ | ✅ |
-|           |     Tree models      | ✅ | ✅ | ✅ | ✅ | | ✅ | ✅ |
-|           |         L2X          | ✅ | | | | | | |
-| Image     |         LIME         | ✅ | | | | ✅ | | |
-|           |         SHAP         | ✅ | | | | ✅ | | |
-|           | Integrated gradient  | ✅ | | | | ✅ | ✅ | |
-|           | Grad-CAM, Grad-CAM++ | ✅ | | | ✅ | ✅ | | |
-|           |         CEM          | ✅ | | ✅ | | | ✅ | |
-|           |    Counterfactual    | ✅ | | | | | ✅ | |
-|           |         L2X          | ✅ | | | | | | |
-| Text      |         LIME         | ✅ | | | ✅ | ✅ | | |
-|           |         SHAP         | ✅ | | | | ✅ | | |
-|           | Integrated gradient  | ✅ | | | | ✅ | ✅ | |
-|           |         L2X          | ✅ | | | | | | |
-|           |    Counterfactual    | ✅ | | | | | | |
-| Timeseries  |         SHAP         | ✅ | | | | | | |
-|           |    Counterfactual    | ✅ | | | | | | |
+This [table](https://opensource.salesforce.com/OmniXAI/latest/index.html#comparison-with-competitors) 
+shows the comparison between our toolkit/library and other existing XAI toolkits/libraries
+in literature
 
 ## Installation
 
@@ -134,6 +108,7 @@ Some examples:
 3. [Image classification](https://github.com/salesforce/OmniXAI/blob/main/tutorials/vision.ipynb)
 4. [Text classification](https://github.com/salesforce/OmniXAI/blob/main/tutorials/nlp_imdb.ipynb)
 5. [Time-series anomaly detection](https://github.com/salesforce/OmniXAI/blob/main/tutorials/timeseries.ipynb)
+6. [Vision-language tasks](https://github.com/salesforce/OmniXAI/blob/main/tutorials/vision/gradcam_vlm.ipynb)
 
 To get started, we recommend the linked tutorials in [tutorials](https://opensource.salesforce.com/OmniXAI/latest/tutorials.html).
 In general, we recommend using `TabularExplainer`, `VisionExplainer`,
 
@@ -24,7 +24,7 @@ class Image(Data):
     data_type = "image"
 
     def __init__(
-        self, data: Union[np.ndarray, PilImage.Image] = None, batched: bool = False, channel_last: bool = True
+            self, data: Union[np.ndarray, PilImage.Image] = None, batched: bool = False, channel_last: bool = True
     ):
         """
         :param data: The image data, which is either np.ndarray or PIL.Image. If ``data``
@@ -111,7 +111,7 @@ def __getitem__(self, i: Union[int, slice, list]):
         :rtype: Image
         """
         if isinstance(i, int):
-            return Image(self.data[i : i + 1], batched=True, channel_last=True)
+            return Image(self.data[i: i + 1], batched=True, channel_last=True)
         else:
             return Image(self.data[i], batched=True, channel_last=True)
 
 
@@ -0,0 +1,73 @@
+#
+# Copyright (c) 2022 salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+#
+"""
+The class for multiple inputs.
+"""
+from typing import Dict, Union
+from .base import Data
+from .tabular import Tabular
+from .image import Image
+from .text import Text
+
+
+class MultiInputs(Data):
+    """
+    This data class is used for a model with multiple inputs, e.g., a visual-language model with
+    images and texts as its inputs, or a ranking model with queries and items as its inputs.
+    The data is stored in a dict, e.g., `{"image": Image(), "text": Text()}`.
+    """
+    data_type = "timeseries"
+
+    def __init__(self, **inputs):
+        """
+        :param inputs: Multiple input parameters, e.g., ``inputs = {"image": Image(), "text": Text()}``.
+        """
+        super().__init__()
+        num_samples = []
+        for key, value in inputs.items():
+            assert isinstance(value, (Tabular, Image, Text)), \
+                f"The type of input {key} must be `Tabular`, `Image` or `Text` " \
+                f"instead of {type(value)}."
+            num_samples.append(value.num_samples())
+        assert min(num_samples) == max(num_samples), \
+            f"The numbers of samples in the inputs are different: {num_samples}."
+
+        for key, value in inputs.items():
+            setattr(self, key, value)
+        self.inputs = inputs
+        self.nsamples = num_samples[0]
+
+    @property
+    def values(self) -> Dict:
+        """
+        Returns the raw values of each input.
+
+        :return: A dict containing the raw values for each input.
+        """
+        return {key: value.values for key, value in self.inputs.items()}
+
+    def num_samples(self) -> int:
+        """
+        Returns the number of samples in the inputs.
+
+        :return: The number samples in the inputs.
+        """
+        return self.nsamples
+
+    def __contains__(self, item):
+        return item in self.inputs
+
+    def __getitem__(self, i: Union[int, slice, list]):
+        """
+        Get a subset of the input samples given an index or a set of indices.
+
+        :param i: An integer index, slice or list.
+        :return: A MultiInputs object with the selected samples.
+        :rtype: MultiInputs
+        """
+        inputs = {key: value[i] for key, value in self.inputs.items()}
+        return MultiInputs(**inputs)
@@ -30,7 +30,7 @@ def __init__(self):
         self.embedding_layer_inputs = None
 
     def compute_integrated_gradients(
-        self, model, embedding_layer, inputs, output_index, additional_inputs=None, steps=50
+        self, model, embedding_layer, inputs, output_index, additional_inputs=None, steps=50, batch_size=8
     ):
         import torch
 
@@ -48,26 +48,32 @@ def compute_integrated_gradients(
             hooks.append(embedding_layer.register_forward_hook(self._embedding_hook))
             model(*all_inputs)
             baselines = np.zeros(self.embeddings.shape)
+            hooks.append(embedding_layer.register_forward_hook(self._embedding_layer_hook))
 
             # Build the inputs for computing integrated gradient
             alphas = np.linspace(start=0.0, stop=1.0, num=steps, endpoint=True)
-            self.embedding_layer_inputs = torch.tensor(
-                np.stack([baselines[0] + a * (self.embeddings[0] - baselines[0]) for a in alphas]),
-                dtype=torch.get_default_dtype(),
-                device=device,
-                requires_grad=True,
-            )
-            all_inputs = self._repeat(all_inputs, num_reps=self.embedding_layer_inputs.shape[0])
+            gradients = []
+            for k in range(0, len(alphas), batch_size):
+                self.embedding_layer_inputs = torch.tensor(
+                    np.stack([baselines[0] + a * (self.embeddings[0] - baselines[0])
+                              for a in alphas[k:k + batch_size]]),
+                    dtype=torch.get_default_dtype(),
+                    device=device,
+                    requires_grad=True,
+                )
+                repeated_inputs = self._repeat(all_inputs, num_reps=self.embedding_layer_inputs.shape[0])
 
-            # Compute gradients
-            hooks.append(embedding_layer.register_forward_hook(self._embedding_layer_hook))
-            predictions = model(*all_inputs)
-            if len(predictions.shape) > 1:
-                assert output_index is not None, "The model has multiple outputs, the output index cannot be None"
-                predictions = predictions[:, output_index]
-            gradients = (
-                torch.autograd.grad(torch.unbind(predictions), self.embedding_layer_inputs)[0].detach().cpu().numpy()
-            )
+                # Compute gradients
+                predictions = model(*repeated_inputs)
+                if len(predictions.shape) > 1:
+                    assert output_index is not None, "The model has multiple outputs, the output index cannot be None"
+                    predictions = predictions[:, output_index]
+                grad = (
+                    torch.autograd.grad(
+                        torch.unbind(predictions), self.embedding_layer_inputs)[0].detach().cpu().numpy()
+                )
+                gradients.append(grad)
+            gradients = np.concatenate(gradients, axis=0)
         finally:
             for hook in hooks:
                 hook.remove()
@@ -90,7 +96,7 @@ def __init__(self):
         self.embedding_layer_inputs = None
 
     def compute_integrated_gradients(
-        self, model, embedding_layer, inputs, output_index, additional_inputs=None, steps=50
+            self, model, embedding_layer, inputs, output_index, additional_inputs=None, steps=50, batch_size=8
     ):
         import tensorflow as tf
 
@@ -107,22 +113,28 @@ def compute_integrated_gradients(
 
             # Build the inputs for computing integrated gradient
             alphas = np.linspace(start=0.0, stop=1.0, num=steps, endpoint=True)
-            self.embedding_layer_inputs = tf.convert_to_tensor(
-                np.stack([baselines[0] + a * (self.embeddings[0] - baselines[0]) for a in alphas]),
-                dtype=tf.keras.backend.floatx(),
-            )
-            all_inputs = [
-                tf.tile(x, (self.embedding_layer_inputs.shape[0],) + (1,) * (len(x.shape) - 1)) for x in all_inputs
-            ]
-
             # Compute gradients
-            with tf.GradientTape() as tape:
-                self._embedding_layer_hook(embedding_layer, tape)
-                predictions = model(*all_inputs)
-                if len(predictions.shape) > 1:
-                    assert output_index is not None, "The model has multiple outputs, the output index cannot be None"
-                    predictions = predictions[:, output_index]
-                gradients = tape.gradient(predictions, embedding_layer.res).numpy()
+            gradients = []
+            for k in range(0, len(alphas), batch_size):
+                with tf.GradientTape() as tape:
+                    self._embedding_layer_hook(embedding_layer, tape)
+                    self.embedding_layer_inputs = tf.convert_to_tensor(
+                        np.stack([baselines[0] + a * (self.embeddings[0] - baselines[0])
+                                  for a in alphas[k:k + batch_size]]),
+                        dtype=tf.keras.backend.floatx(),
+                    )
+                    repeated_inputs = [
+                        tf.tile(x, (self.embedding_layer_inputs.shape[0],) + (1,) * (len(x.shape) - 1))
+                        for x in all_inputs
+                    ]
+                    predictions = model(*repeated_inputs)
+                    if len(predictions.shape) > 1:
+                        assert output_index is not None, \
+                            "The model has multiple outputs, the output index cannot be None"
+                        predictions = predictions[:, output_index]
+                    grad = tape.gradient(predictions, embedding_layer.res).numpy()
+                    gradients.append(grad)
+            gradients = np.concatenate(gradients, axis=0)
         finally:
             self._remove_hook(embedding_layer, original_call)
         return _calculate_integral(self.embeddings[0], baselines[0], gradients)
@@ -164,13 +176,13 @@ class IntegratedGradientText(ExplainerBase):
     alias = ["ig", "integrated_gradient"]
 
     def __init__(
-        self,
-        model,
-        embedding_layer,
-        preprocess_function: Callable,
-        mode: str = "classification",
-        id2token: Dict = None,
-        **kwargs,
+            self,
+            model,
+            embedding_layer,
+            preprocess_function: Callable,
+            mode: str = "classification",
+            id2token: Dict = None,
+            **kwargs,
     ):
         """
         :param model: The model to explain, whose type can be `tf.keras.Model` or `torch.nn.Module`.
@@ -245,6 +257,7 @@ def explain(self, X: Text, y=None, **kwargs) -> WordImportance:
         :return: The explanations for all the instances, e.g., word/token importance scores.
         """
         steps = kwargs.get("steps", 50)
+        batch_size = kwargs.get("batch_size", 16)
         explanations = WordImportance(mode=self.mode)
 
         inputs = self._preprocess(X)
@@ -275,6 +288,7 @@ def explain(self, X: Text, y=None, **kwargs) -> WordImportance:
                 output_index=output_index,
                 additional_inputs=None if len(inputs) == 1 else inputs[1:],
                 steps=steps,
+                batch_size=batch_size
             )
             tokens = inputs[0].detach().cpu().numpy() if self.model_type == "torch" else inputs[0].numpy()
             explanations.add(
 
@@ -0,0 +1,12 @@
+#
+# Copyright (c) 2022 salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+#
+from .specific.gradcam import GradCAM
+
+
+__all__ = [
+    "GradCAM",
+]
@@ -0,0 +1,9 @@
+#
+# Copyright (c) 2022 salesforce.com, inc.
+# All rights reserved.
+# SPDX-License-Identifier: BSD-3-Clause
+# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+#
+from .gradcam import GradCAM
+
+__all__ = ["GradCAM"]