diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
new file mode 100644
index 00000000..98b2a668
--- /dev/null
+++ b/.github/workflows/black.yml
@@ -0,0 +1,10 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: psf/black@stable
\ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index e244d4c4..88817b68 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,10 +5,16 @@ To contribute to it, please follow guidelines here.
 
 The codebase is hosted on Github at https://github.com/uber/causalml.
 
-All code need to follow the [PEP8 style guide](https://www.python.org/dev/peps/pep-0008/) with a few exceptions listed in [tox.ini](./tox.ini).
+We use [`black`](https://black.readthedocs.io/en/stable/index.html) as a formatter to keep the coding style and format across all Python files consistent and compliant with [PEP8](https://www.python.org/dev/peps/pep-0008/). We recommend that you add `black` to your IDE as a formatter (see the [instruction](https://black.readthedocs.io/en/stable/integrations/editors.html)) or run `black` on the command line before submitting a PR as follows:
+```bash
+# move to the top directory of the causalml repository
+$ cd causalml 
+$ pip install -U black
+$ black .
+```
 
-Before contributing, please review outstanding issues.
-If you'd like to contribute to something else, open an issue for discussion first.
+As a start, please check out outstanding [issues](https://github.com/uber/causalml/issues).
+If you'd like to contribute to something else, open a new issue for discussion first.
 
 ## Development Workflow :computer:
 
diff --git a/causalml/__init__.py b/causalml/__init__.py
index e9480e55..fdc56733 100644
--- a/causalml/__init__.py
+++ b/causalml/__init__.py
@@ -1,10 +1,12 @@
-name = 'causalml'
-__version__ = '0.12.1'
-__all__ = ['dataset',
-           'features',
-           'feature_selection',
-           'inference',
-           'match',
-           'metrics',
-           'optimize',
-           'propensity']
+name = "causalml"
+__version__ = "0.12.1"
+__all__ = [
+    "dataset",
+    "features",
+    "feature_selection",
+    "inference",
+    "match",
+    "metrics",
+    "optimize",
+    "propensity",
+]
diff --git a/causalml/dataset/classification.py b/causalml/dataset/classification.py
index db5f3095..baa9011d 100644
--- a/causalml/dataset/classification.py
+++ b/causalml/dataset/classification.py
@@ -3,21 +3,39 @@
 from sklearn.datasets import make_classification
 
 
-def make_uplift_classification(n_samples=1000,
-                               treatment_name=['control', 'treatment1', 'treatment2', 'treatment3'],
-                               y_name='conversion',
-                               n_classification_features=10,
-                               n_classification_informative=5,
-                               n_classification_redundant=0,
-                               n_classification_repeated=0,
-                               n_uplift_increase_dict={'treatment1': 2, 'treatment2': 2, 'treatment3': 2},
-                               n_uplift_decrease_dict={'treatment1': 0, 'treatment2': 0, 'treatment3': 0},
-                               delta_uplift_increase_dict={'treatment1': 0.02, 'treatment2': 0.05, 'treatment3': 0.1},
-                               delta_uplift_decrease_dict={'treatment1': 0., 'treatment2': 0., 'treatment3': 0.},
-                               n_uplift_increase_mix_informative_dict={'treatment1': 1, 'treatment2': 1, 'treatment3': 1},
-                               n_uplift_decrease_mix_informative_dict={'treatment1': 0, 'treatment2': 0, 'treatment3': 0},
-                               positive_class_proportion=0.5,
-                               random_seed=20190101):
+def make_uplift_classification(
+    n_samples=1000,
+    treatment_name=["control", "treatment1", "treatment2", "treatment3"],
+    y_name="conversion",
+    n_classification_features=10,
+    n_classification_informative=5,
+    n_classification_redundant=0,
+    n_classification_repeated=0,
+    n_uplift_increase_dict={"treatment1": 2, "treatment2": 2, "treatment3": 2},
+    n_uplift_decrease_dict={"treatment1": 0, "treatment2": 0, "treatment3": 0},
+    delta_uplift_increase_dict={
+        "treatment1": 0.02,
+        "treatment2": 0.05,
+        "treatment3": 0.1,
+    },
+    delta_uplift_decrease_dict={
+        "treatment1": 0.0,
+        "treatment2": 0.0,
+        "treatment3": 0.0,
+    },
+    n_uplift_increase_mix_informative_dict={
+        "treatment1": 1,
+        "treatment2": 1,
+        "treatment3": 1,
+    },
+    n_uplift_decrease_mix_informative_dict={
+        "treatment1": 0,
+        "treatment2": 0,
+        "treatment3": 0,
+    },
+    positive_class_proportion=0.5,
+    random_seed=20190101,
+):
     """Generate a synthetic dataset for classification uplift modeling problem.
 
     Parameters
@@ -90,33 +108,44 @@ def make_uplift_classification(n_samples=1000,
     for ti in treatment_name:
         treatment_list += [ti] * n_samples
     treatment_list = np.random.permutation(treatment_list)
-    df_res['treatment_group_key'] = treatment_list
+    df_res["treatment_group_key"] = treatment_list
 
     # generate features and labels
-    X1, Y1 = make_classification(n_samples=n_all, n_features=n_classification_features,
-                                 n_informative=n_classification_informative, n_redundant=n_classification_redundant,
-                                 n_repeated=n_classification_repeated, n_clusters_per_class=1,
-                                 weights=[1-positive_class_proportion, positive_class_proportion])
+    X1, Y1 = make_classification(
+        n_samples=n_all,
+        n_features=n_classification_features,
+        n_informative=n_classification_informative,
+        n_redundant=n_classification_redundant,
+        n_repeated=n_classification_repeated,
+        n_clusters_per_class=1,
+        weights=[1 - positive_class_proportion, positive_class_proportion],
+    )
 
     x_name = []
     x_informative_name = []
     for xi in range(n_classification_informative):
-        x_name_i = 'x' + str(len(x_name)+1) + '_informative'
+        x_name_i = "x" + str(len(x_name) + 1) + "_informative"
         x_name.append(x_name_i)
         x_informative_name.append(x_name_i)
         df_res[x_name_i] = X1[:, xi]
     for xi in range(n_classification_redundant):
-        x_name_i = 'x' + str(len(x_name)+1) + '_redundant'
+        x_name_i = "x" + str(len(x_name) + 1) + "_redundant"
         x_name.append(x_name_i)
-        df_res[x_name_i] = X1[:, n_classification_informative+xi]
+        df_res[x_name_i] = X1[:, n_classification_informative + xi]
     for xi in range(n_classification_repeated):
-        x_name_i = 'x' + str(len(x_name)+1) + '_repeated'
+        x_name_i = "x" + str(len(x_name) + 1) + "_repeated"
         x_name.append(x_name_i)
-        df_res[x_name_i] = X1[:, n_classification_informative+n_classification_redundant+xi]
-
-    for xi in range(n_classification_features - n_classification_informative - n_classification_redundant
-                    - n_classification_repeated):
-        x_name_i = 'x' + str(len(x_name)+1) + '_irrelevant'
+        df_res[x_name_i] = X1[
+            :, n_classification_informative + n_classification_redundant + xi
+        ]
+
+    for xi in range(
+        n_classification_features
+        - n_classification_informative
+        - n_classification_redundant
+        - n_classification_repeated
+    ):
+        x_name_i = "x" + str(len(x_name) + 1) + "_irrelevant"
         x_name.append(x_name_i)
         df_res[x_name_i] = np.random.normal(0, 1, n_all)
 
@@ -127,57 +156,87 @@ def make_uplift_classification(n_samples=1000,
 
     # generate uplift (positive)
     for treatment_key_i in treatment_name:
-        treatment_index = df_res.index[df_res['treatment_group_key'] == treatment_key_i].tolist()
-        if treatment_key_i in n_uplift_increase_dict and n_uplift_increase_dict[treatment_key_i] > 0:
+        treatment_index = df_res.index[
+            df_res["treatment_group_key"] == treatment_key_i
+        ].tolist()
+        if (
+            treatment_key_i in n_uplift_increase_dict
+            and n_uplift_increase_dict[treatment_key_i] > 0
+        ):
             x_uplift_increase_name = []
-            adjust_class_proportion = (delta_uplift_increase_dict[treatment_key_i]) / (1-positive_class_proportion)
-            X_increase, Y_increase = make_classification(n_samples=n_all,
-                                                         n_features=n_uplift_increase_dict[treatment_key_i],
-                                                         n_informative=n_uplift_increase_dict[treatment_key_i],
-                                                         n_redundant=0,
-                                                         n_clusters_per_class=1,
-                                                         weights=[1-adjust_class_proportion, adjust_class_proportion])
+            adjust_class_proportion = (delta_uplift_increase_dict[treatment_key_i]) / (
+                1 - positive_class_proportion
+            )
+            X_increase, Y_increase = make_classification(
+                n_samples=n_all,
+                n_features=n_uplift_increase_dict[treatment_key_i],
+                n_informative=n_uplift_increase_dict[treatment_key_i],
+                n_redundant=0,
+                n_clusters_per_class=1,
+                weights=[1 - adjust_class_proportion, adjust_class_proportion],
+            )
             for xi in range(n_uplift_increase_dict[treatment_key_i]):
-                x_name_i = 'x' + str(len(x_name)+1) + '_uplift_increase'
+                x_name_i = "x" + str(len(x_name) + 1) + "_uplift_increase"
                 x_name.append(x_name_i)
                 x_uplift_increase_name.append(x_name_i)
                 df_res[x_name_i] = X_increase[:, xi]
             Y[treatment_index] = Y[treatment_index] + Y_increase[treatment_index]
             if n_uplift_increase_mix_informative_dict[treatment_key_i] > 0:
-                for xi in range(n_uplift_increase_mix_informative_dict[treatment_key_i]):
-                    x_name_i = 'x' + str(len(x_name)+1) + '_increase_mix'
+                for xi in range(
+                    n_uplift_increase_mix_informative_dict[treatment_key_i]
+                ):
+                    x_name_i = "x" + str(len(x_name) + 1) + "_increase_mix"
                     x_name.append(x_name_i)
-                    df_res[x_name_i] = (np.random.uniform(-1, 1) * df_res[np.random.choice(x_informative_name)]
-                                        + np.random.uniform(-1, 1) * df_res[np.random.choice(x_uplift_increase_name)])
+                    df_res[x_name_i] = (
+                        np.random.uniform(-1, 1)
+                        * df_res[np.random.choice(x_informative_name)]
+                        + np.random.uniform(-1, 1)
+                        * df_res[np.random.choice(x_uplift_increase_name)]
+                    )
 
     # generate uplift (negative)
     for treatment_key_i in treatment_name:
-        treatment_index = df_res.index[df_res['treatment_group_key'] == treatment_key_i].tolist()
-        if treatment_key_i in n_uplift_decrease_dict and n_uplift_decrease_dict[treatment_key_i] > 0:
+        treatment_index = df_res.index[
+            df_res["treatment_group_key"] == treatment_key_i
+        ].tolist()
+        if (
+            treatment_key_i in n_uplift_decrease_dict
+            and n_uplift_decrease_dict[treatment_key_i] > 0
+        ):
             x_uplift_decrease_name = []
-            adjust_class_proportion = (delta_uplift_decrease_dict[treatment_key_i]) / (1-positive_class_proportion)
-            X_decrease, Y_decrease = make_classification(n_samples=n_all,
-                                                         n_features=n_uplift_decrease_dict[treatment_key_i],
-                                                         n_informative=n_uplift_decrease_dict[treatment_key_i],
-                                                         n_redundant=0,
-                                                         n_clusters_per_class=1,
-                                                         weights=[1-adjust_class_proportion, adjust_class_proportion])
+            adjust_class_proportion = (delta_uplift_decrease_dict[treatment_key_i]) / (
+                1 - positive_class_proportion
+            )
+            X_decrease, Y_decrease = make_classification(
+                n_samples=n_all,
+                n_features=n_uplift_decrease_dict[treatment_key_i],
+                n_informative=n_uplift_decrease_dict[treatment_key_i],
+                n_redundant=0,
+                n_clusters_per_class=1,
+                weights=[1 - adjust_class_proportion, adjust_class_proportion],
+            )
             for xi in range(n_uplift_decrease_dict[treatment_key_i]):
-                x_name_i = 'x' + str(len(x_name)+1) + '_uplift_decrease'
+                x_name_i = "x" + str(len(x_name) + 1) + "_uplift_decrease"
                 x_name.append(x_name_i)
                 x_uplift_decrease_name.append(x_name_i)
                 df_res[x_name_i] = X_decrease[:, xi]
             Y[treatment_index] = Y[treatment_index] - Y_decrease[treatment_index]
             if n_uplift_decrease_mix_informative_dict[treatment_key_i] > 0:
-                for xi in range(n_uplift_decrease_mix_informative_dict[treatment_key_i]):
-                    x_name_i = 'x' + str(len(x_name)+1) + '_decrease_mix'
+                for xi in range(
+                    n_uplift_decrease_mix_informative_dict[treatment_key_i]
+                ):
+                    x_name_i = "x" + str(len(x_name) + 1) + "_decrease_mix"
                     x_name.append(x_name_i)
-                    df_res[x_name_i] = (np.random.uniform(-1, 1) * df_res[np.random.choice(x_informative_name)]
-                                        + np.random.uniform(-1, 1) * df_res[np.random.choice(x_uplift_decrease_name)])
+                    df_res[x_name_i] = (
+                        np.random.uniform(-1, 1)
+                        * df_res[np.random.choice(x_informative_name)]
+                        + np.random.uniform(-1, 1)
+                        * df_res[np.random.choice(x_uplift_decrease_name)]
+                    )
 
     # truncate Y
     Y = np.clip(Y, 0, 1)
 
     df_res[y_name] = Y
-    df_res['treatment_effect'] = Y - Y1
+    df_res["treatment_effect"] = Y - Y1
     return df_res, x_name
diff --git a/causalml/dataset/regression.py b/causalml/dataset/regression.py
index b574008c..36b70bcb 100644
--- a/causalml/dataset/regression.py
+++ b/causalml/dataset/regression.py
@@ -3,11 +3,11 @@
 from scipy.special import expit, logit
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
-def synthetic_data(mode=1, n=1000, p=5, sigma=1.0, adj=0.):
-    ''' Synthetic data in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'
+def synthetic_data(mode=1, n=1000, p=5, sigma=1.0, adj=0.0):
+    """ Synthetic data in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'
 
     Args:
         mode (int, optional): mode of the simulation: \
@@ -31,20 +31,24 @@ def synthetic_data(mode=1, n=1000, p=5, sigma=1.0, adj=0.):
             - tau ((n,)-array): individual treatment effect.
             - b ((n,)-array): expected outcome.
             - e ((n,)-array): propensity of receiving treatment.
-    '''
-
-    catalog = {1: simulate_nuisance_and_easy_treatment,
-               2: simulate_randomized_trial,
-               3: simulate_easy_propensity_difficult_baseline,
-               4: simulate_unrelated_treatment_control,
-               5: simulate_hidden_confounder}
-
-    assert mode in catalog, 'Invalid mode {}. Should be one of {}'.format(mode, set(catalog))
+    """
+
+    catalog = {
+        1: simulate_nuisance_and_easy_treatment,
+        2: simulate_randomized_trial,
+        3: simulate_easy_propensity_difficult_baseline,
+        4: simulate_unrelated_treatment_control,
+        5: simulate_hidden_confounder,
+    }
+
+    assert mode in catalog, "Invalid mode {}. Should be one of {}".format(
+        mode, set(catalog)
+    )
     return catalog[mode](n, p, sigma, adj)
 
 
-def simulate_nuisance_and_easy_treatment(n=1000, p=5, sigma=1.0, adj=0.):
-    ''' Synthetic data with a difficult nuisance components and an easy treatment effect
+def simulate_nuisance_and_easy_treatment(n=1000, p=5, sigma=1.0, adj=0.0):
+    """Synthetic data with a difficult nuisance components and an easy treatment effect
         From Setup A in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'
 
     Args:
@@ -62,12 +66,20 @@ def simulate_nuisance_and_easy_treatment(n=1000, p=5, sigma=1.0, adj=0.):
             - tau ((n,)-array): individual treatment effect.
             - b ((n,)-array): expected outcome.
             - e ((n,)-array): propensity of receiving treatment.
-    '''
-
-    X = np.random.uniform(size=n*p).reshape((n, -1))
-    b = np.sin(np.pi * X[:, 0] * X[:, 1]) + 2 * (X[:, 2] - 0.5) ** 2 + X[:, 3] + 0.5 * X[:, 4]
+    """
+
+    X = np.random.uniform(size=n * p).reshape((n, -1))
+    b = (
+        np.sin(np.pi * X[:, 0] * X[:, 1])
+        + 2 * (X[:, 2] - 0.5) ** 2
+        + X[:, 3]
+        + 0.5 * X[:, 4]
+    )
     eta = 0.1
-    e = np.maximum(np.repeat(eta, n), np.minimum(np.sin(np.pi * X[:, 0] * X[:, 1]), np.repeat(1-eta, n)))
+    e = np.maximum(
+        np.repeat(eta, n),
+        np.minimum(np.sin(np.pi * X[:, 0] * X[:, 1]), np.repeat(1 - eta, n)),
+    )
     e = expit(logit(e) - adj)
     tau = (X[:, 0] + X[:, 1]) / 2
 
@@ -77,8 +89,8 @@ def simulate_nuisance_and_easy_treatment(n=1000, p=5, sigma=1.0, adj=0.):
     return y, X, w, tau, b, e
 
 
-def simulate_randomized_trial(n=1000, p=5, sigma=1.0, adj=0.):
-    ''' Synthetic data of a randomized trial
+def simulate_randomized_trial(n=1000, p=5, sigma=1.0, adj=0.0):
+    """Synthetic data of a randomized trial
         From Setup B in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'
 
     Args:
@@ -97,10 +109,12 @@ def simulate_randomized_trial(n=1000, p=5, sigma=1.0, adj=0.):
             - tau ((n,)-array): individual treatment effect.
             - b ((n,)-array): expected outcome.
             - e ((n,)-array): propensity of receiving treatment.
-    '''
+    """
 
-    X = np.random.normal(size=n*p).reshape((n, -1))
-    b = np.maximum(np.repeat(0.0, n), X[:, 0] + X[:, 1], X[:, 2]) + np.maximum(np.repeat(0.0, n), X[:, 3] + X[:, 4])
+    X = np.random.normal(size=n * p).reshape((n, -1))
+    b = np.maximum(np.repeat(0.0, n), X[:, 0] + X[:, 1], X[:, 2]) + np.maximum(
+        np.repeat(0.0, n), X[:, 3] + X[:, 4]
+    )
     e = np.repeat(0.5, n)
     tau = X[:, 0] + np.log1p(np.exp(X[:, 1]))
 
@@ -110,8 +124,8 @@ def simulate_randomized_trial(n=1000, p=5, sigma=1.0, adj=0.):
     return y, X, w, tau, b, e
 
 
-def simulate_easy_propensity_difficult_baseline(n=1000, p=5, sigma=1.0, adj=0.):
-    ''' Synthetic data with easy propensity and a difficult baseline
+def simulate_easy_propensity_difficult_baseline(n=1000, p=5, sigma=1.0, adj=0.0):
+    """Synthetic data with easy propensity and a difficult baseline
         From Setup C in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'
 
     Args:
@@ -129,11 +143,11 @@ def simulate_easy_propensity_difficult_baseline(n=1000, p=5, sigma=1.0, adj=0.):
             - tau ((n,)-array): individual treatment effect.
             - b ((n,)-array): expected outcome.
             - e ((n,)-array): propensity of receiving treatment.
-    '''
+    """
 
-    X = np.random.normal(size=n*p).reshape((n, -1))
+    X = np.random.normal(size=n * p).reshape((n, -1))
     b = 2 * np.log1p(np.exp(X[:, 0] + X[:, 1] + X[:, 2]))
-    e = 1/(1 + np.exp(X[:, 1] + X[:, 2]))
+    e = 1 / (1 + np.exp(X[:, 1] + X[:, 2]))
     tau = np.repeat(1.0, n)
 
     w = np.random.binomial(1, e, size=n)
@@ -142,8 +156,8 @@ def simulate_easy_propensity_difficult_baseline(n=1000, p=5, sigma=1.0, adj=0.):
     return y, X, w, tau, b, e
 
 
-def simulate_unrelated_treatment_control(n=1000, p=5, sigma=1.0, adj=0.):
-    ''' Synthetic data with unrelated treatment and control groups.
+def simulate_unrelated_treatment_control(n=1000, p=5, sigma=1.0, adj=0.0):
+    """Synthetic data with unrelated treatment and control groups.
         From Setup D in Nie X. and Wager S. (2018) 'Quasi-Oracle Estimation of Heterogeneous Treatment Effects'
 
     Args:
@@ -161,14 +175,18 @@ def simulate_unrelated_treatment_control(n=1000, p=5, sigma=1.0, adj=0.):
             - tau ((n,)-array): individual treatment effect.
             - b ((n,)-array): expected outcome.
             - e ((n,)-array): propensity of receiving treatment.
-    '''
-
-    X = np.random.normal(size=n*p).reshape((n, -1))
-    b = (np.maximum(np.repeat(0.0, n), X[:, 0] + X[:, 1] + X[:, 2])
-         + np.maximum(np.repeat(0.0, n), X[:, 3] + X[:, 4])) / 2
-    e = 1/(1 + np.exp(-X[:, 0]) + np.exp(-X[:, 1]))
+    """
+
+    X = np.random.normal(size=n * p).reshape((n, -1))
+    b = (
+        np.maximum(np.repeat(0.0, n), X[:, 0] + X[:, 1] + X[:, 2])
+        + np.maximum(np.repeat(0.0, n), X[:, 3] + X[:, 4])
+    ) / 2
+    e = 1 / (1 + np.exp(-X[:, 0]) + np.exp(-X[:, 1]))
     e = expit(logit(e) - adj)
-    tau = np.maximum(np.repeat(0.0, n), X[:, 0] + X[:, 1] + X[:, 2]) - np.maximum(np.repeat(0.0, n), X[:, 3] + X[:, 4])
+    tau = np.maximum(np.repeat(0.0, n), X[:, 0] + X[:, 1] + X[:, 2]) - np.maximum(
+        np.repeat(0.0, n), X[:, 3] + X[:, 4]
+    )
 
     w = np.random.binomial(1, e, size=n)
     y = b + (w - 0.5) * tau + sigma * np.random.normal(size=n)
@@ -176,8 +194,8 @@ def simulate_unrelated_treatment_control(n=1000, p=5, sigma=1.0, adj=0.):
     return y, X, w, tau, b, e
 
 
-def simulate_hidden_confounder(n=10000, p=5, sigma=1.0, adj=0.):
-    ''' Synthetic dataset with a hidden confounder biasing treatment.
+def simulate_hidden_confounder(n=10000, p=5, sigma=1.0, adj=0.0):
+    """Synthetic dataset with a hidden confounder biasing treatment.
         From Louizos et al. (2018) "Causal Effect Inference with Deep Latent-Variable Models"
 
     Args:
@@ -195,7 +213,7 @@ def simulate_hidden_confounder(n=10000, p=5, sigma=1.0, adj=0.):
             - tau ((n,)-array): individual treatment effect.
             - b ((n,)-array): expected outcome.
             - e ((n,)-array): propensity of receiving treatment.
-    '''
+    """
     z = np.random.binomial(1, 0.5, size=n).astype(np.double)
     X = np.random.normal(z, 5 * z + 3 * (1 - z), size=(p, n)).T
     e = 0.75 * z + 0.25 * (1 - z)
@@ -204,7 +222,7 @@ def simulate_hidden_confounder(n=10000, p=5, sigma=1.0, adj=0.):
     y = np.random.binomial(1, b)
 
     # Compute true ite tau for evaluation (via Monte Carlo approximation).
-    t0_t1 = np.array([[0.], [1.]])
+    t0_t1 = np.array([[0.0], [1.0]])
     y_t0, y_t1 = expit(3 * (z + 2 * (2 * t0_t1 - 2)))
     tau = y_t1 - y_t0
     return y, X, w, tau, b, e
diff --git a/causalml/dataset/synthetic.py b/causalml/dataset/synthetic.py
index 347304d6..1d36e2f7 100644
--- a/causalml/dataset/synthetic.py
+++ b/causalml/dataset/synthetic.py
@@ -12,17 +12,22 @@
 from scipy.stats import entropy
 import warnings
 
-from causalml.inference.meta import BaseXRegressor, BaseRRegressor, BaseSRegressor, BaseTRegressor
+from causalml.inference.meta import (
+    BaseXRegressor,
+    BaseRRegressor,
+    BaseSRegressor,
+    BaseTRegressor,
+)
 from causalml.inference.tree import CausalTreeRegressor
 from causalml.propensity import ElasticNetPropensityModel
 from causalml.metrics import plot_gain, get_cumgain
 
 
-plt.style.use('fivethirtyeight')
-warnings.filterwarnings('ignore')
+plt.style.use("fivethirtyeight")
+warnings.filterwarnings("ignore")
 
-KEY_GENERATED_DATA = 'generated_data'
-KEY_ACTUAL = 'Actuals'
+KEY_GENERATED_DATA = "generated_data"
+KEY_ACTUAL = "Actuals"
 
 RANDOM_SEED = 42
 
@@ -42,7 +47,14 @@ def get_synthetic_preds(synthetic_data_func, n=1000, estimators={}):
 
     preds_dict = {}
     preds_dict[KEY_ACTUAL] = tau
-    preds_dict[KEY_GENERATED_DATA] = {'y': y, 'X': X, 'w': w, 'tau': tau, 'b': b, 'e': e}
+    preds_dict[KEY_GENERATED_DATA] = {
+        "y": y,
+        "X": X,
+        "w": w,
+        "tau": tau,
+        "b": b,
+        "e": e,
+    }
 
     # Predict p_hat because e would not be directly observed in real-life
     p_model = ElasticNetPropensityModel()
@@ -51,22 +63,30 @@ def get_synthetic_preds(synthetic_data_func, n=1000, estimators={}):
     if estimators:
         for name, learner in estimators.items():
             try:
-                preds_dict[name] = learner.fit_predict(X=X, treatment=w, y=y, p=p_hat).flatten()
+                preds_dict[name] = learner.fit_predict(
+                    X=X, treatment=w, y=y, p=p_hat
+                ).flatten()
             except TypeError:
                 preds_dict[name] = learner.fit_predict(X=X, treatment=w, y=y).flatten()
     else:
-        for base_learner, label_l in zip([BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor],
-                                         ['S', 'T', 'X', 'R']):
-            for model, label_m in zip([LinearRegression, XGBRegressor], ['LR', 'XGB']):
+        for base_learner, label_l in zip(
+            [BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor],
+            ["S", "T", "X", "R"],
+        ):
+            for model, label_m in zip([LinearRegression, XGBRegressor], ["LR", "XGB"]):
                 learner = base_learner(model())
-                model_name = '{} Learner ({})'.format(label_l, label_m)
+                model_name = "{} Learner ({})".format(label_l, label_m)
                 try:
-                    preds_dict[model_name] = learner.fit_predict(X=X, treatment=w, y=y, p=p_hat).flatten()
+                    preds_dict[model_name] = learner.fit_predict(
+                        X=X, treatment=w, y=y, p=p_hat
+                    ).flatten()
                 except TypeError:
-                    preds_dict[model_name] = learner.fit_predict(X=X, treatment=w, y=y).flatten()
+                    preds_dict[model_name] = learner.fit_predict(
+                        X=X, treatment=w, y=y
+                    ).flatten()
 
         learner = CausalTreeRegressor(random_state=RANDOM_SEED)
-        preds_dict['Causal Tree'] = learner.fit_predict(X=X, treatment=w, y=y).flatten()
+        preds_dict["Causal Tree"] = learner.fit_predict(X=X, treatment=w, y=y).flatten()
 
     return preds_dict
 
@@ -82,14 +102,22 @@ def get_synthetic_summary(synthetic_data_func, n=1000, k=1, estimators={}):
     summaries = []
 
     for i in range(k):
-        synthetic_preds = get_synthetic_preds(synthetic_data_func, n=n, estimators=estimators)
+        synthetic_preds = get_synthetic_preds(
+            synthetic_data_func, n=n, estimators=estimators
+        )
         actuals = synthetic_preds[KEY_ACTUAL]
-        synthetic_summary = pd.DataFrame({label: [preds.mean(), mse(preds, actuals)] for label, preds
-                                          in synthetic_preds.items() if label != KEY_GENERATED_DATA},
-                                         index=['ATE', 'MSE']).T
-
-        synthetic_summary['Abs % Error of ATE'] = np.abs((synthetic_summary['ATE'] /
-                                                          synthetic_summary.loc[KEY_ACTUAL, 'ATE']) - 1)
+        synthetic_summary = pd.DataFrame(
+            {
+                label: [preds.mean(), mse(preds, actuals)]
+                for label, preds in synthetic_preds.items()
+                if label != KEY_GENERATED_DATA
+            },
+            index=["ATE", "MSE"],
+        ).T
+
+        synthetic_summary["Abs % Error of ATE"] = np.abs(
+            (synthetic_summary["ATE"] / synthetic_summary.loc[KEY_ACTUAL, "ATE"]) - 1
+        )
 
         for label in synthetic_summary.index:
             stacked_values = np.hstack((synthetic_preds[label], actuals))
@@ -98,17 +126,17 @@ def get_synthetic_summary(synthetic_data_func, n=1000, k=1, estimators={}):
             bins = np.linspace(stacked_low, stacked_high, 100)
 
             distr = np.histogram(synthetic_preds[label], bins=bins)[0]
-            distr = np.clip(distr/distr.sum(), 0.001, 0.999)
+            distr = np.clip(distr / distr.sum(), 0.001, 0.999)
             true_distr = np.histogram(actuals, bins=bins)[0]
-            true_distr = np.clip(true_distr/true_distr.sum(), 0.001, 0.999)
+            true_distr = np.clip(true_distr / true_distr.sum(), 0.001, 0.999)
 
             kl = entropy(distr, true_distr)
-            synthetic_summary.loc[label, 'KL Divergence'] = kl
+            synthetic_summary.loc[label, "KL Divergence"] = kl
 
         summaries.append(synthetic_summary)
 
     summary = sum(summaries) / k
-    return summary[['Abs % Error of ATE', 'MSE', 'KL Divergence']]
+    return summary[["Abs % Error of ATE", "MSE", "KL Divergence"]]
 
 
 def scatter_plot_summary(synthetic_summary, k, drop_learners=[], drop_cols=[]):
@@ -125,8 +153,8 @@ def scatter_plot_summary(synthetic_summary, k, drop_learners=[], drop_cols=[]):
 
     fig, ax = plt.subplots()
     fig.set_size_inches(12, 8)
-    xs = plot_data['Abs % Error of ATE']
-    ys = plot_data['MSE']
+    xs = plot_data["Abs % Error of ATE"]
+    ys = plot_data["MSE"]
 
     ax.scatter(xs, ys)
 
@@ -134,14 +162,26 @@ def scatter_plot_summary(synthetic_summary, k, drop_learners=[], drop_cols=[]):
     xlim = ax.get_xlim()
 
     for i, txt in enumerate(plot_data.index):
-        ax.annotate(txt, (xs[i] - np.random.binomial(1, 0.5)*xlim[1]*0.04, ys[i] - ylim[1]*0.03))
-
-    ax.set_xlabel('Abs % Error of ATE')
-    ax.set_ylabel('MSE')
-    ax.set_title('Learner Performance (averaged over k={} simulations)'.format(k))
-
-
-def bar_plot_summary(synthetic_summary, k, drop_learners=[], drop_cols=[], sort_cols=['MSE', 'Abs % Error of ATE']):
+        ax.annotate(
+            txt,
+            (
+                xs[i] - np.random.binomial(1, 0.5) * xlim[1] * 0.04,
+                ys[i] - ylim[1] * 0.03,
+            ),
+        )
+
+    ax.set_xlabel("Abs % Error of ATE")
+    ax.set_ylabel("MSE")
+    ax.set_title("Learner Performance (averaged over k={} simulations)".format(k))
+
+
+def bar_plot_summary(
+    synthetic_summary,
+    k,
+    drop_learners=[],
+    drop_cols=[],
+    sort_cols=["MSE", "Abs % Error of ATE"],
+):
     """Generates a bar plot comparing learner performance.
 
     Args:
@@ -154,13 +194,21 @@ def bar_plot_summary(synthetic_summary, k, drop_learners=[], drop_cols=[], sort_
     plot_data = synthetic_summary.sort_values(sort_cols, ascending=True)
     plot_data = plot_data.drop(drop_learners + [KEY_ACTUAL]).drop(drop_cols, axis=1)
 
-    plot_data.plot(kind='bar', figsize=(12, 8))
+    plot_data.plot(kind="bar", figsize=(12, 8))
     plt.xticks(rotation=30)
-    plt.title('Learner Performance (averaged over k={} simulations)'.format(k))
-
-
-def distr_plot_single_sim(synthetic_preds, kind='kde', drop_learners=[], bins=50, histtype='step', alpha=1, linewidth=1,
-               bw_method=1):
+    plt.title("Learner Performance (averaged over k={} simulations)".format(k))
+
+
+def distr_plot_single_sim(
+    synthetic_preds,
+    kind="kde",
+    drop_learners=[],
+    bins=50,
+    histtype="step",
+    alpha=1,
+    linewidth=1,
+    bw_method=1,
+):
     """Plots the distribution of each learner's predictions (for a single simulation).
     Kernel Density Estimation (kde) and actual histogram plots supported.
 
@@ -185,22 +233,46 @@ def distr_plot_single_sim(synthetic_preds, kind='kde', drop_learners=[], bins=50
 
     # Plotting
     plt.figure(figsize=(12, 8))
-    colors = ['black', 'red', 'blue', 'green', 'cyan', 'brown', 'grey', 'pink', 'orange', 'yellow']
+    colors = [
+        "black",
+        "red",
+        "blue",
+        "green",
+        "cyan",
+        "brown",
+        "grey",
+        "pink",
+        "orange",
+        "yellow",
+    ]
     for i, (k, v) in enumerate(preds_for_plot.items()):
         if k in learners:
-            if kind == 'kde':
+            if kind == "kde":
                 v = pd.Series(v.flatten())
                 v = v[v.between(global_lower, global_upper)]
-                v.plot(kind='kde', bw_method=bw_method, label=k, linewidth=linewidth, color=colors[i])
-            elif kind == 'hist':
-                plt.hist(v, bins=np.linspace(global_lower, global_upper, bins), label=k, histtype=histtype,
-                         alpha=alpha, linewidth=linewidth, color=colors[i])
+                v.plot(
+                    kind="kde",
+                    bw_method=bw_method,
+                    label=k,
+                    linewidth=linewidth,
+                    color=colors[i],
+                )
+            elif kind == "hist":
+                plt.hist(
+                    v,
+                    bins=np.linspace(global_lower, global_upper, bins),
+                    label=k,
+                    histtype=histtype,
+                    alpha=alpha,
+                    linewidth=linewidth,
+                    color=colors[i],
+                )
             else:
                 pass
 
     plt.xlim(global_lower, global_upper)
-    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
-    plt.title('Distribution from a Single Simulation')
+    plt.legend(loc="center left", bbox_to_anchor=(1, 0.5))
+    plt.title("Distribution from a Single Simulation")
 
 
 def scatter_plot_single_sim(synthetic_preds):
@@ -220,18 +292,25 @@ def scatter_plot_single_sim(synthetic_preds):
     axes = np.ravel(axes)
 
     for i, (label, preds) in enumerate(preds_for_plot.items()):
-        axes[i].scatter(preds_for_plot[KEY_ACTUAL], preds, s=2, label='Predictions')
+        axes[i].scatter(preds_for_plot[KEY_ACTUAL], preds, s=2, label="Predictions")
         axes[i].set_title(label, size=12)
-        axes[i].set_xlabel('Actual', size=10)
-        axes[i].set_ylabel('Prediction', size=10)
+        axes[i].set_xlabel("Actual", size=10)
+        axes[i].set_ylabel("Prediction", size=10)
         xlim = axes[i].get_xlim()
         ylim = axes[i].get_xlim()
-        axes[i].plot([xlim[0], xlim[1]], [ylim[0], ylim[1]], label='Perfect Model', linewidth=1, color='grey')
-        axes[i].legend(loc=2, prop={'size': 10})
-
-
-def get_synthetic_preds_holdout(synthetic_data_func, n=1000, valid_size=0.2,
-                                estimators={}):
+        axes[i].plot(
+            [xlim[0], xlim[1]],
+            [ylim[0], ylim[1]],
+            label="Perfect Model",
+            linewidth=1,
+            color="grey",
+        )
+        axes[i].legend(loc=2, prop={"size": 10})
+
+
+def get_synthetic_preds_holdout(
+    synthetic_data_func, n=1000, valid_size=0.2, estimators={}
+):
     """Generate predictions for synthetic data using specified function (single simulation) for train and holdout
 
     Args:
@@ -248,8 +327,22 @@ def get_synthetic_preds_holdout(synthetic_data_func, n=1000, valid_size=0.2,
     """
     y, X, w, tau, b, e = synthetic_data_func(n=n)
 
-    X_train, X_val, y_train, y_val, w_train, w_val, tau_train, tau_val, b_train, b_val, e_train, e_val = \
-        train_test_split(X, y, w, tau, b, e, test_size=valid_size, random_state=RANDOM_SEED, shuffle=True)
+    (
+        X_train,
+        X_val,
+        y_train,
+        y_val,
+        w_train,
+        w_val,
+        tau_train,
+        tau_val,
+        b_train,
+        b_val,
+        e_train,
+        e_val,
+    ) = train_test_split(
+        X, y, w, tau, b, e, test_size=valid_size, random_state=RANDOM_SEED, shuffle=True
+    )
 
     preds_dict_train = {}
     preds_dict_valid = {}
@@ -257,51 +350,63 @@ def get_synthetic_preds_holdout(synthetic_data_func, n=1000, valid_size=0.2,
     preds_dict_train[KEY_ACTUAL] = tau_train
     preds_dict_valid[KEY_ACTUAL] = tau_val
 
-    preds_dict_train['generated_data'] = {
-        'y': y_train,
-        'X': X_train,
-        'w': w_train,
-        'tau': tau_train,
-        'b': b_train,
-        'e': e_train}
-    preds_dict_valid['generated_data'] = {
-        'y': y_val,
-        'X': X_val,
-        'w': w_val,
-        'tau': tau_val,
-        'b': b_val,
-        'e': e_val}
+    preds_dict_train["generated_data"] = {
+        "y": y_train,
+        "X": X_train,
+        "w": w_train,
+        "tau": tau_train,
+        "b": b_train,
+        "e": e_train,
+    }
+    preds_dict_valid["generated_data"] = {
+        "y": y_val,
+        "X": X_val,
+        "w": w_val,
+        "tau": tau_val,
+        "b": b_val,
+        "e": e_val,
+    }
 
     # Predict p_hat because e would not be directly observed in real-life
     p_model = ElasticNetPropensityModel()
     p_hat_train = p_model.fit_predict(X_train, w_train)
     p_hat_val = p_model.fit_predict(X_val, w_val)
 
-    for base_learner, label_l in zip([BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor],
-                                     ['S', 'T', 'X', 'R']):
-        for model, label_m in zip([LinearRegression, XGBRegressor], ['LR', 'XGB']):
+    for base_learner, label_l in zip(
+        [BaseSRegressor, BaseTRegressor, BaseXRegressor, BaseRRegressor],
+        ["S", "T", "X", "R"],
+    ):
+        for model, label_m in zip([LinearRegression, XGBRegressor], ["LR", "XGB"]):
             # RLearner will need to fit on the p_hat
-            if label_l != 'R':
+            if label_l != "R":
                 learner = base_learner(model())
                 # fit the model on training data only
                 learner.fit(X=X_train, treatment=w_train, y=y_train)
                 try:
-                    preds_dict_train['{} Learner ({})'.format(
-                        label_l, label_m)] = learner.predict(X=X_train, p=p_hat_train).flatten()
-                    preds_dict_valid['{} Learner ({})'.format(
-                        label_l, label_m)] = learner.predict(X=X_val, p=p_hat_val).flatten()
+                    preds_dict_train[
+                        "{} Learner ({})".format(label_l, label_m)
+                    ] = learner.predict(X=X_train, p=p_hat_train).flatten()
+                    preds_dict_valid[
+                        "{} Learner ({})".format(label_l, label_m)
+                    ] = learner.predict(X=X_val, p=p_hat_val).flatten()
                 except TypeError:
-                    preds_dict_train['{} Learner ({})'.format(
-                        label_l, label_m)] = learner.predict(X=X_train, treatment=w_train, y=y_train).flatten()
-                    preds_dict_valid['{} Learner ({})'.format(
-                        label_l, label_m)] = learner.predict(X=X_val, treatment=w_val, y=y_val).flatten()
+                    preds_dict_train[
+                        "{} Learner ({})".format(label_l, label_m)
+                    ] = learner.predict(
+                        X=X_train, treatment=w_train, y=y_train
+                    ).flatten()
+                    preds_dict_valid[
+                        "{} Learner ({})".format(label_l, label_m)
+                    ] = learner.predict(X=X_val, treatment=w_val, y=y_val).flatten()
             else:
                 learner = base_learner(model())
                 learner.fit(X=X_train, p=p_hat_train, treatment=w_train, y=y_train)
-                preds_dict_train['{} Learner ({})'.format(
-                    label_l, label_m)] = learner.predict(X=X_train).flatten()
-                preds_dict_valid['{} Learner ({})'.format(
-                    label_l, label_m)] = learner.predict(X=X_val).flatten()
+                preds_dict_train[
+                    "{} Learner ({})".format(label_l, label_m)
+                ] = learner.predict(X=X_train).flatten()
+                preds_dict_valid[
+                    "{} Learner ({})".format(label_l, label_m)
+                ] = learner.predict(X=X_val).flatten()
 
     return preds_dict_train, preds_dict_valid
 
@@ -327,23 +432,43 @@ def get_synthetic_summary_holdout(synthetic_data_func, n=1000, valid_size=0.2, k
     summaries_validation = []
 
     for i in range(k):
-        preds_dict_train, preds_dict_valid = get_synthetic_preds_holdout(synthetic_data_func, n=n,
-                                                                         valid_size=valid_size)
+        preds_dict_train, preds_dict_valid = get_synthetic_preds_holdout(
+            synthetic_data_func, n=n, valid_size=valid_size
+        )
         actuals_train = preds_dict_train[KEY_ACTUAL]
         actuals_validation = preds_dict_valid[KEY_ACTUAL]
 
-        synthetic_summary_train = pd.DataFrame({label: [preds.mean(), mse(preds, actuals_train)] for label, preds
-                                                in preds_dict_train.items() if KEY_GENERATED_DATA not in label.lower()},
-                                               index=['ATE', 'MSE']).T
-        synthetic_summary_train['Abs % Error of ATE'] = np.abs(
-            (synthetic_summary_train['ATE']/synthetic_summary_train.loc[KEY_ACTUAL, 'ATE']) - 1)
-
-        synthetic_summary_validation = pd.DataFrame({label: [preds.mean(), mse(preds, actuals_validation)]
-                                                     for label, preds in preds_dict_valid.items()
-                                                     if KEY_GENERATED_DATA not in label.lower()},
-                                                    index=['ATE', 'MSE']).T
-        synthetic_summary_validation['Abs % Error of ATE'] = np.abs(
-            (synthetic_summary_validation['ATE']/synthetic_summary_validation.loc[KEY_ACTUAL, 'ATE']) - 1)
+        synthetic_summary_train = pd.DataFrame(
+            {
+                label: [preds.mean(), mse(preds, actuals_train)]
+                for label, preds in preds_dict_train.items()
+                if KEY_GENERATED_DATA not in label.lower()
+            },
+            index=["ATE", "MSE"],
+        ).T
+        synthetic_summary_train["Abs % Error of ATE"] = np.abs(
+            (
+                synthetic_summary_train["ATE"]
+                / synthetic_summary_train.loc[KEY_ACTUAL, "ATE"]
+            )
+            - 1
+        )
+
+        synthetic_summary_validation = pd.DataFrame(
+            {
+                label: [preds.mean(), mse(preds, actuals_validation)]
+                for label, preds in preds_dict_valid.items()
+                if KEY_GENERATED_DATA not in label.lower()
+            },
+            index=["ATE", "MSE"],
+        ).T
+        synthetic_summary_validation["Abs % Error of ATE"] = np.abs(
+            (
+                synthetic_summary_validation["ATE"]
+                / synthetic_summary_validation.loc[KEY_ACTUAL, "ATE"]
+            )
+            - 1
+        )
 
         # calculate kl divergence for training
         for label in synthetic_summary_train.index:
@@ -353,12 +478,12 @@ def get_synthetic_summary_holdout(synthetic_data_func, n=1000, valid_size=0.2, k
             bins = np.linspace(stacked_low, stacked_high, 100)
 
             distr = np.histogram(preds_dict_train[label], bins=bins)[0]
-            distr = np.clip(distr/distr.sum(), 0.001, 0.999)
+            distr = np.clip(distr / distr.sum(), 0.001, 0.999)
             true_distr = np.histogram(actuals_train, bins=bins)[0]
-            true_distr = np.clip(true_distr/true_distr.sum(), 0.001, 0.999)
+            true_distr = np.clip(true_distr / true_distr.sum(), 0.001, 0.999)
 
             kl = entropy(distr, true_distr)
-            synthetic_summary_train.loc[label, 'KL Divergence'] = kl
+            synthetic_summary_train.loc[label, "KL Divergence"] = kl
 
         # calculate kl divergence for validation
         for label in synthetic_summary_validation.index:
@@ -368,24 +493,32 @@ def get_synthetic_summary_holdout(synthetic_data_func, n=1000, valid_size=0.2, k
             bins = np.linspace(stacked_low, stacked_high, 100)
 
             distr = np.histogram(preds_dict_valid[label], bins=bins)[0]
-            distr = np.clip(distr/distr.sum(), 0.001, 0.999)
+            distr = np.clip(distr / distr.sum(), 0.001, 0.999)
             true_distr = np.histogram(actuals_validation, bins=bins)[0]
-            true_distr = np.clip(true_distr/true_distr.sum(), 0.001, 0.999)
+            true_distr = np.clip(true_distr / true_distr.sum(), 0.001, 0.999)
 
             kl = entropy(distr, true_distr)
-            synthetic_summary_validation.loc[label, 'KL Divergence'] = kl
+            synthetic_summary_validation.loc[label, "KL Divergence"] = kl
 
         summaries_train.append(synthetic_summary_train)
         summaries_validation.append(synthetic_summary_validation)
 
     summary_train = sum(summaries_train) / k
     summary_validation = sum(summaries_validation) / k
-    return (summary_train[['Abs % Error of ATE', 'MSE', 'KL Divergence']],
-            summary_validation[['Abs % Error of ATE', 'MSE', 'KL Divergence']])
-
-
-def scatter_plot_summary_holdout(train_summary, validation_summary, k, label=['Train', 'Validation'], drop_learners=[],
-                                 drop_cols=[]):
+    return (
+        summary_train[["Abs % Error of ATE", "MSE", "KL Divergence"]],
+        summary_validation[["Abs % Error of ATE", "MSE", "KL Divergence"]],
+    )
+
+
+def scatter_plot_summary_holdout(
+    train_summary,
+    validation_summary,
+    k,
+    label=["Train", "Validation"],
+    drop_learners=[],
+    drop_cols=[],
+):
     """Generates a scatter plot comparing learner performance by training and validation.
 
     Args:
@@ -401,15 +534,17 @@ def scatter_plot_summary_holdout(train_summary, validation_summary, k, label=['T
     validation_summary = validation_summary.drop(drop_learners).drop(drop_cols, axis=1)
 
     plot_data = pd.concat([train_summary, validation_summary])
-    plot_data['label'] = [i.replace('Train', '') for i in plot_data.index]
-    plot_data['label'] = [i.replace('Validation', '') for i in plot_data.label]
+    plot_data["label"] = [i.replace("Train", "") for i in plot_data.index]
+    plot_data["label"] = [i.replace("Validation", "") for i in plot_data.label]
 
     fig, ax = plt.subplots()
     fig.set_size_inches(12, 8)
-    xs = plot_data['Abs % Error of ATE']
-    ys = plot_data['MSE']
-    group = np.array([label[0]] * train_summary.shape[0] + [label[1]] * validation_summary.shape[0])
-    cdict = {label[0]: 'red', label[1]: 'blue'}
+    xs = plot_data["Abs % Error of ATE"]
+    ys = plot_data["MSE"]
+    group = np.array(
+        [label[0]] * train_summary.shape[0] + [label[1]] * validation_summary.shape[0]
+    )
+    cdict = {label[0]: "red", label[1]: "blue"}
 
     for g in np.unique(group):
         ix = np.where(group == g)[0].tolist()
@@ -418,14 +553,16 @@ def scatter_plot_summary_holdout(train_summary, validation_summary, k, label=['T
     for i, txt in enumerate(plot_data.label[:10]):
         ax.annotate(txt, (xs[i] + 0.005, ys[i]))
 
-    ax.set_xlabel('Abs % Error of ATE')
-    ax.set_ylabel('MSE')
-    ax.set_title('Learner Performance (averaged over k={} simulations)'.format(k))
-    ax.legend(loc='center left', bbox_to_anchor=(1.1, 0.5))
+    ax.set_xlabel("Abs % Error of ATE")
+    ax.set_ylabel("MSE")
+    ax.set_title("Learner Performance (averaged over k={} simulations)".format(k))
+    ax.legend(loc="center left", bbox_to_anchor=(1.1, 0.5))
     plt.show()
 
 
-def bar_plot_summary_holdout(train_summary, validation_summary, k, drop_learners=[], drop_cols=[]):
+def bar_plot_summary_holdout(
+    train_summary, validation_summary, k, drop_learners=[], drop_cols=[]
+):
     """Generates a bar plot comparing learner performance by training and validation
 
     Args:
@@ -437,26 +574,36 @@ def bar_plot_summary_holdout(train_summary, validation_summary, k, drop_learners
         drop_cols (list, optional): list of metrics (str) to omit when plotting
     """
     train_summary = train_summary.drop([KEY_ACTUAL])
-    train_summary['Learner'] = train_summary.index
+    train_summary["Learner"] = train_summary.index
 
     validation_summary = validation_summary.drop([KEY_ACTUAL])
-    validation_summary['Learner'] = validation_summary.index
+    validation_summary["Learner"] = validation_summary.index
 
-    for metric in ['Abs % Error of ATE', 'MSE', 'KL Divergence']:
+    for metric in ["Abs % Error of ATE", "MSE", "KL Divergence"]:
         plot_data_sub = pd.DataFrame(train_summary.Learner).reset_index(drop=True)
-        plot_data_sub['train'] = train_summary[metric].values
-        plot_data_sub['validation'] = validation_summary[metric].values
-        plot_data_sub = plot_data_sub.set_index('Learner')
+        plot_data_sub["train"] = train_summary[metric].values
+        plot_data_sub["validation"] = validation_summary[metric].values
+        plot_data_sub = plot_data_sub.set_index("Learner")
         plot_data_sub = plot_data_sub.drop(drop_learners).drop(drop_cols, axis=1)
-        plot_data_sub = plot_data_sub.sort_values('train', ascending=True)
+        plot_data_sub = plot_data_sub.sort_values("train", ascending=True)
 
-        plot_data_sub.plot(kind='bar', color=['red', 'blue'], figsize=(12, 8))
+        plot_data_sub.plot(kind="bar", color=["red", "blue"], figsize=(12, 8))
         plt.xticks(rotation=30)
-        plt.title('Learner Performance of {} (averaged over k={} simulations)'.format(metric, k))
-
-
-def get_synthetic_auuc(synthetic_preds, drop_learners=[], outcome_col='y', treatment_col='w',
-                       treatment_effect_col='tau', plot=True):
+        plt.title(
+            "Learner Performance of {} (averaged over k={} simulations)".format(
+                metric, k
+            )
+        )
+
+
+def get_synthetic_auuc(
+    synthetic_preds,
+    drop_learners=[],
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    plot=True,
+):
     """Get auuc values for cumulative gains of model estimates in quantiles.
 
     For details, reference get_cumgain() and plot_gain()
@@ -476,24 +623,37 @@ def get_synthetic_auuc(synthetic_preds, drop_learners=[], outcome_col='y', treat
     synthetic_preds_df = pd.DataFrame(synthetic_preds_df)
     synthetic_preds_df = synthetic_preds_df.drop(drop_learners, axis=1)
 
-    synthetic_preds_df['y'] = generated_data[outcome_col]
-    synthetic_preds_df['w'] = generated_data[treatment_col]
+    synthetic_preds_df["y"] = generated_data[outcome_col]
+    synthetic_preds_df["w"] = generated_data[treatment_col]
     if treatment_effect_col in generated_data.keys():
-        synthetic_preds_df['tau'] = generated_data[treatment_effect_col]
-
-    assert ((outcome_col in synthetic_preds_df.columns) and
-            (treatment_col in synthetic_preds_df.columns) or
-            treatment_effect_col in synthetic_preds_df.columns)
-
-    cumlift = get_cumgain(synthetic_preds_df, outcome_col='y', treatment_col='w',
-                          treatment_effect_col='tau')
+        synthetic_preds_df["tau"] = generated_data[treatment_effect_col]
+
+    assert (
+        (outcome_col in synthetic_preds_df.columns)
+        and (treatment_col in synthetic_preds_df.columns)
+        or treatment_effect_col in synthetic_preds_df.columns
+    )
+
+    cumlift = get_cumgain(
+        synthetic_preds_df,
+        outcome_col="y",
+        treatment_col="w",
+        treatment_effect_col="tau",
+    )
     auuc_df = pd.DataFrame(cumlift.columns)
-    auuc_df.columns = ['Learner']
-    auuc_df['cum_gain_auuc'] = [auc(cumlift.index.values/100, cumlift[learner].values) for learner in cumlift.columns]
-    auuc_df = auuc_df.sort_values('cum_gain_auuc', ascending=False)
+    auuc_df.columns = ["Learner"]
+    auuc_df["cum_gain_auuc"] = [
+        auc(cumlift.index.values / 100, cumlift[learner].values)
+        for learner in cumlift.columns
+    ]
+    auuc_df = auuc_df.sort_values("cum_gain_auuc", ascending=False)
 
     if plot:
-        plot_gain(synthetic_preds_df, outcome_col=outcome_col,
-                  treatment_col=treatment_col, treatment_effect_col=treatment_effect_col)
+        plot_gain(
+            synthetic_preds_df,
+            outcome_col=outcome_col,
+            treatment_col=treatment_col,
+            treatment_effect_col=treatment_effect_col,
+        )
 
     return auuc_df
diff --git a/causalml/feature_selection/__init__.py b/causalml/feature_selection/__init__.py
index bfda7605..3c8a623c 100644
--- a/causalml/feature_selection/__init__.py
+++ b/causalml/feature_selection/__init__.py
@@ -1 +1 @@
-from .filters import FilterSelect
\ No newline at end of file
+from .filters import FilterSelect
diff --git a/causalml/feature_selection/filters.py b/causalml/feature_selection/filters.py
index 109f4948..81ac94d3 100644
--- a/causalml/feature_selection/filters.py
+++ b/causalml/feature_selection/filters.py
@@ -9,10 +9,10 @@
 import statsmodels.api as sm
 from scipy import stats
 from sklearn.impute import SimpleImputer
-    
+
+
 class FilterSelect:
-    """A class for feature importance methods.
-    """
+    """A class for feature importance methods."""
 
     def __init__(self):
         return
@@ -35,23 +35,29 @@ def _filter_F_one_feature(data, treatment_indicator, feature_name, y_name):
         Y = data[y_name]
         X = data[[treatment_indicator, feature_name]]
         X = sm.add_constant(X)
-        X['{}-{}'.format(treatment_indicator, feature_name)] = X[[treatment_indicator, feature_name]].product(axis=1)
+        X["{}-{}".format(treatment_indicator, feature_name)] = X[
+            [treatment_indicator, feature_name]
+        ].product(axis=1)
 
         model = sm.OLS(Y, X)
         result = model.fit()
 
         F_test = result.f_test(np.array([0, 0, 0, 1]))
-        F_test_result = pd.DataFrame({
-            'feature': feature_name, # for the interaction, not the main effect
-            'method': 'F-statistic',
-            'score': F_test.fvalue[0][0],
-            'p_value': F_test.pvalue,
-            'misc': 'df_num: {}, df_denom: {}'.format(F_test.df_num, F_test.df_denom),
-        }, index=[0]).reset_index(drop=True)
+        F_test_result = pd.DataFrame(
+            {
+                "feature": feature_name,  # for the interaction, not the main effect
+                "method": "F-statistic",
+                "score": F_test.fvalue[0][0],
+                "p_value": F_test.pvalue,
+                "misc": "df_num: {}, df_denom: {}".format(
+                    F_test.df_num, F_test.df_denom
+                ),
+            },
+            index=[0],
+        ).reset_index(drop=True)
 
         return F_test_result
 
-
     def filter_F(self, data, treatment_indicator, features, y_name):
         """
         Rank features based on the F-statistics of the interaction.
@@ -68,19 +74,23 @@ def filter_F(self, data, treatment_indicator, features, y_name):
         """
         all_result = pd.DataFrame()
         for x_name_i in features:
-            one_result = self._filter_F_one_feature(data=data,
-                treatment_indicator=treatment_indicator, feature_name=x_name_i, y_name=y_name
+            one_result = self._filter_F_one_feature(
+                data=data,
+                treatment_indicator=treatment_indicator,
+                feature_name=x_name_i,
+                y_name=y_name,
             )
             all_result = pd.concat([all_result, one_result])
 
-        all_result = all_result.sort_values(by='score', ascending=False)
-        all_result['rank'] = all_result['score'].rank(ascending=False)
+        all_result = all_result.sort_values(by="score", ascending=False)
+        all_result["rank"] = all_result["score"].rank(ascending=False)
 
         return all_result
 
-
     @staticmethod
-    def _filter_LR_one_feature(data, treatment_indicator, feature_name, y_name, disp=True):
+    def _filter_LR_one_feature(
+        data, treatment_indicator, feature_name, y_name, disp=True
+    ):
         """
         Conduct LR (Likelihood Ratio) test of the interaction between treatment and one feature.
 
@@ -104,7 +114,9 @@ def _filter_LR_one_feature(data, treatment_indicator, feature_name, y_name, disp
 
         # Full model (with interaction)
         X_f = X_r.copy()
-        X_f['{}-{}'.format(treatment_indicator, feature_name)] = X_f[[treatment_indicator, feature_name]].product(axis=1)
+        X_f["{}-{}".format(treatment_indicator, feature_name)] = X_f[
+            [treatment_indicator, feature_name]
+        ].product(axis=1)
         model_f = sm.Logit(Y, X_f)
         result_f = model_f.fit(disp=disp)
 
@@ -112,17 +124,19 @@ def _filter_LR_one_feature(data, treatment_indicator, feature_name, y_name, disp
         LR_df = len(result_f.params) - len(result_r.params)
         LR_pvalue = 1 - stats.chi2.cdf(LR_stat, df=LR_df)
 
-        LR_test_result = pd.DataFrame({
-            'feature': feature_name, # for the interaction, not the main effect
-            'method': 'LRT-statistic',
-            'score': LR_stat,
-            'p_value': LR_pvalue,
-            'misc': 'df: {}'.format(LR_df),
-        }, index=[0]).reset_index(drop=True)
+        LR_test_result = pd.DataFrame(
+            {
+                "feature": feature_name,  # for the interaction, not the main effect
+                "method": "LRT-statistic",
+                "score": LR_stat,
+                "p_value": LR_pvalue,
+                "misc": "df: {}".format(LR_df),
+            },
+            index=[0],
+        ).reset_index(drop=True)
 
         return LR_test_result
 
-
     def filter_LR(self, data, treatment_indicator, features, y_name, disp=True):
         """
         Rank features based on the LRT-statistics of the interaction.
@@ -139,22 +153,28 @@ def filter_LR(self, data, treatment_indicator, features, y_name, disp=True):
         """
         all_result = pd.DataFrame()
         for x_name_i in features:
-            one_result = self._filter_LR_one_feature(data=data,
-                treatment_indicator=treatment_indicator, feature_name=x_name_i, y_name=y_name, disp=disp
+            one_result = self._filter_LR_one_feature(
+                data=data,
+                treatment_indicator=treatment_indicator,
+                feature_name=x_name_i,
+                y_name=y_name,
+                disp=disp,
             )
             all_result = pd.concat([all_result, one_result])
 
-        all_result = all_result.sort_values(by='score', ascending=False)
-        all_result['rank'] = all_result['score'].rank(ascending=False)
+        all_result = all_result.sort_values(by="score", ascending=False)
+        all_result["rank"] = all_result["score"].rank(ascending=False)
 
         return all_result
 
-
     # Get node summary - a function
     @staticmethod
-    def _GetNodeSummary(data,
-                        experiment_group_column='treatment_group_key',
-                        y_name='conversion', smooth=True):
+    def _GetNodeSummary(
+        data,
+        experiment_group_column="treatment_group_key",
+        y_name="conversion",
+        smooth=True,
+    ):
         """
         To count the conversions and get the probabilities by treatment groups. This function comes from the uplift tree algorithm, that is used for tree node split evaluation.
 
@@ -196,9 +216,13 @@ def _GetNodeSummary(data,
             results.update({ti: {}})
             for ci in y_name_keys:
                 if smooth:
-                    results[ti].update({ci: results_series[ti, ci]
-                                        if results_series.index.isin([(ti, ci)]).any()
-                                        else 1})
+                    results[ti].update(
+                        {
+                            ci: results_series[ti, ci]
+                            if results_series.index.isin([(ti, ci)]).any()
+                            else 1
+                        }
+                    )
                 else:
                     results[ti].update({ci: results_series[ti, ci]})
 
@@ -206,8 +230,9 @@ def _GetNodeSummary(data,
         nodeSummary = {}
         for treatment_group_key in results:
             n_1 = results[treatment_group_key].get(1, 0)
-            n_total = (results[treatment_group_key].get(1, 0)
-                       + results[treatment_group_key].get(0, 0))
+            n_total = results[treatment_group_key].get(1, 0) + results[
+                treatment_group_key
+            ].get(0, 0)
             y_mean = 1.0 * n_1 / n_total
             nodeSummary[treatment_group_key] = [y_mean, n_total]
 
@@ -227,10 +252,10 @@ def _kl_divergence(pk, qk):
             qk = 0.1**6
         elif qk > 1 - 0.1**6:
             qk = 1 - 0.1**6
-        S = pk * np.log(pk / qk) + (1-pk) * np.log((1-pk) / (1-qk))
+        S = pk * np.log(pk / qk) + (1 - pk) * np.log((1 - pk) / (1 - qk))
         return S
 
-    def _evaluate_KL(self, nodeSummary, control_group='control'):
+    def _evaluate_KL(self, nodeSummary, control_group="control"):
         """
         Calculate the multi-treatment unconditional D (one node)
         with KL Divergence as split Evaluation function.
@@ -253,7 +278,7 @@ def _evaluate_KL(self, nodeSummary, control_group='control'):
         return d_res
 
     @staticmethod
-    def _evaluate_ED(nodeSummary, control_group='control'):
+    def _evaluate_ED(nodeSummary, control_group="control"):
         """
         Calculate the multi-treatment unconditional D (one node)
         with Euclidean Distance as split Evaluation function.
@@ -268,11 +293,11 @@ def _evaluate_ED(nodeSummary, control_group='control'):
         d_res = 0
         for treatment_group in nodeSummary:
             if treatment_group != control_group:
-                d_res += 2 * (nodeSummary[treatment_group][0] - pc)**2
+                d_res += 2 * (nodeSummary[treatment_group][0] - pc) ** 2
         return d_res
 
     @staticmethod
-    def _evaluate_Chi(nodeSummary, control_group='control'):
+    def _evaluate_Chi(nodeSummary, control_group="control"):
         """
         Calculate the multi-treatment unconditional D (one node)
         with Chi-Square as split Evaluation function.
@@ -287,17 +312,22 @@ def _evaluate_Chi(nodeSummary, control_group='control'):
         d_res = 0
         for treatment_group in nodeSummary:
             if treatment_group != control_group:
-                d_res += (
-                    (nodeSummary[treatment_group][0] - pc)**2 / max(0.1**6, pc)
-                    + (nodeSummary[treatment_group][0] - pc)**2 / max(0.1**6, 1-pc)
-                )
+                d_res += (nodeSummary[treatment_group][0] - pc) ** 2 / max(
+                    0.1**6, pc
+                ) + (nodeSummary[treatment_group][0] - pc) ** 2 / max(0.1**6, 1 - pc)
         return d_res
 
-
-    def _filter_D_one_feature(self, data, feature_name, y_name,
-                              n_bins=10, method='KL', control_group='control',
-                              experiment_group_column='treatment_group_key',
-                              null_impute=None):
+    def _filter_D_one_feature(
+        self,
+        data,
+        feature_name,
+        y_name,
+        n_bins=10,
+        method="KL",
+        control_group="control",
+        experiment_group_column="treatment_group_key",
+        null_impute=None,
+    ):
         """
         Calculate the chosen divergence measure for one feature.
 
@@ -322,59 +352,77 @@ def _filter_D_one_feature(self, data, feature_name, y_name,
         """
         # [TODO] Application to categorical features
 
-        if method == 'KL':
+        if method == "KL":
             evaluationFunction = self._evaluate_KL
-        elif method == 'ED':
+        elif method == "ED":
             evaluationFunction = self._evaluate_ED
-        elif method == 'Chi':
+        elif method == "Chi":
             evaluationFunction = self._evaluate_Chi
 
         totalSize = len(data.index)
 
         # impute null if enabled
         if null_impute is not None:
-            data[feature_name] = SimpleImputer(missing_values=np.nan, strategy=null_impute).fit_transform(data[feature_name].values.reshape(-1, 1))
+            data[feature_name] = SimpleImputer(
+                missing_values=np.nan, strategy=null_impute
+            ).fit_transform(data[feature_name].values.reshape(-1, 1))
         elif data[feature_name].isna().any():
-            raise Exception("Null value(s) present in column '{}'. Please impute the null value or use null_impute parameter provided!!!".format(feature_name))
+            raise Exception(
+                "Null value(s) present in column '{}'. Please impute the null value or use null_impute parameter provided!!!".format(
+                    feature_name
+                )
+            )
 
         # drop duplicate edges in pq.cut result to avoid issues
-        x_bin = pd.qcut(data[feature_name].values, n_bins, labels=False,
-                        duplicates='drop')
+        x_bin = pd.qcut(
+            data[feature_name].values, n_bins, labels=False, duplicates="drop"
+        )
 
         d_children = 0
 
-        for i_bin in range(np.nanmax(x_bin).astype(int) + 1): # range(n_bins):
+        for i_bin in range(np.nanmax(x_bin).astype(int) + 1):  # range(n_bins):
             nodeSummary = self._GetNodeSummary(
                 data=data.loc[x_bin == i_bin],
-                experiment_group_column=experiment_group_column, y_name=y_name
+                experiment_group_column=experiment_group_column,
+                y_name=y_name,
             )[1]
-            nodeScore = evaluationFunction(nodeSummary,
-                                           control_group=control_group)
+            nodeScore = evaluationFunction(nodeSummary, control_group=control_group)
             nodeSize = sum([x[1] for x in list(nodeSummary.values())])
             d_children += nodeScore * nodeSize / totalSize
 
         parentNodeSummary = self._GetNodeSummary(
             data=data, experiment_group_column=experiment_group_column, y_name=y_name
         )[1]
-        d_parent = evaluationFunction(parentNodeSummary,
-                                      control_group=control_group)
+        d_parent = evaluationFunction(parentNodeSummary, control_group=control_group)
 
         d_res = d_children - d_parent
 
-        D_result = pd.DataFrame({
-            'feature': feature_name,
-            'method': method,
-            'score': d_res,
-            'p_value': None,
-            'misc': 'number_of_bins: {}'.format(min(n_bins, np.nanmax(x_bin).astype(int) + 1)),# format(n_bins),
-        }, index=[0]).reset_index(drop=True)
-
-        return(D_result)
-
-    def filter_D(self, data, features, y_name,
-                 n_bins=10, method='KL', control_group='control',
-                 experiment_group_column='treatment_group_key',
-                 null_impute=None):
+        D_result = pd.DataFrame(
+            {
+                "feature": feature_name,
+                "method": method,
+                "score": d_res,
+                "p_value": None,
+                "misc": "number_of_bins: {}".format(
+                    min(n_bins, np.nanmax(x_bin).astype(int) + 1)
+                ),  # format(n_bins),
+            },
+            index=[0],
+        ).reset_index(drop=True)
+
+        return D_result
+
+    def filter_D(
+        self,
+        data,
+        features,
+        y_name,
+        n_bins=10,
+        method="KL",
+        control_group="control",
+        experiment_group_column="treatment_group_key",
+        null_impute=None,
+    ):
         """
         Rank features based on the chosen divergence measure.
 
@@ -402,25 +450,34 @@ def filter_D(self, data, features, y_name,
 
         for x_name_i in features:
             one_result = self._filter_D_one_feature(
-                data=data, feature_name=x_name_i, y_name=y_name,
-                n_bins=n_bins, method=method, control_group=control_group,
+                data=data,
+                feature_name=x_name_i,
+                y_name=y_name,
+                n_bins=n_bins,
+                method=method,
+                control_group=control_group,
                 experiment_group_column=experiment_group_column,
-                null_impute=null_impute
+                null_impute=null_impute,
             )
             all_result = pd.concat([all_result, one_result])
 
-        all_result = all_result.sort_values(by='score', ascending=False)
-        all_result['rank'] = all_result['score'].rank(ascending=False)
+        all_result = all_result.sort_values(by="score", ascending=False)
+        all_result["rank"] = all_result["score"].rank(ascending=False)
 
         return all_result
 
-    def get_importance(self, data, features, y_name, method,
-                      experiment_group_column='treatment_group_key',
-                      control_group = 'control',
-                      treatment_group = 'treatment',
-                      n_bins=5,
-                      null_impute=None
-                      ):
+    def get_importance(
+        self,
+        data,
+        features,
+        y_name,
+        method,
+        experiment_group_column="treatment_group_key",
+        control_group="control",
+        treatment_group="treatment",
+        n_bins=5,
+        null_impute=None,
+    ):
         """
         Rank features based on the chosen statistic of the interaction.
 
@@ -443,28 +500,47 @@ def get_importance(self, data, features, y_name, method,
             all_result : pd.DataFrame
                 a data frame with following columns: ['method', 'feature', 'rank', 'score', 'p_value', 'misc']
         """
-        
-        if method == 'F':
-            data = data[data[experiment_group_column].isin([control_group, treatment_group])]
-            data['treatment_indicator'] = 0
-            data.loc[data[experiment_group_column]==treatment_group,'treatment_indicator'] = 1
-            all_result = self.filter_F(data=data, 
-                treatment_indicator='treatment_indicator', features=features, y_name=y_name
+
+        if method == "F":
+            data = data[
+                data[experiment_group_column].isin([control_group, treatment_group])
+            ]
+            data["treatment_indicator"] = 0
+            data.loc[
+                data[experiment_group_column] == treatment_group, "treatment_indicator"
+            ] = 1
+            all_result = self.filter_F(
+                data=data,
+                treatment_indicator="treatment_indicator",
+                features=features,
+                y_name=y_name,
             )
-        elif method == 'LR':
-            data = data[data[experiment_group_column].isin([control_group, treatment_group])]
-            data['treatment_indicator'] = 0
-            data.loc[data[experiment_group_column]==treatment_group,'treatment_indicator'] = 1
-            all_result = self.filter_LR(data=data, disp=True,
-                treatment_indicator='treatment_indicator', features=features, y_name=y_name
+        elif method == "LR":
+            data = data[
+                data[experiment_group_column].isin([control_group, treatment_group])
+            ]
+            data["treatment_indicator"] = 0
+            data.loc[
+                data[experiment_group_column] == treatment_group, "treatment_indicator"
+            ] = 1
+            all_result = self.filter_LR(
+                data=data,
+                disp=True,
+                treatment_indicator="treatment_indicator",
+                features=features,
+                y_name=y_name,
             )
         else:
-            all_result = self.filter_D(data=data, method=method,
-                features=features, y_name=y_name, 
-                n_bins=n_bins, control_group=control_group,
-                experiment_group_column=experiment_group_column, 
-                null_impute=null_impute
+            all_result = self.filter_D(
+                data=data,
+                method=method,
+                features=features,
+                y_name=y_name,
+                n_bins=n_bins,
+                control_group=control_group,
+                experiment_group_column=experiment_group_column,
+                null_impute=null_impute,
             )
-        
-        all_result['method'] = method + ' filter'
-        return all_result[['method', 'feature', 'rank', 'score', 'p_value', 'misc']]
+
+        all_result["method"] = method + " filter"
+        return all_result[["method", "feature", "rank", "score", "p_value", "misc"]]
diff --git a/causalml/features.py b/causalml/features.py
index 15eec4d8..91471216 100644
--- a/causalml/features.py
+++ b/causalml/features.py
@@ -5,10 +5,10 @@
 from sklearn import base
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
-NAN_INT = -98765    # A random integer to impute missing values with
+NAN_INT = -98765  # A random integer to impute missing values with
 
 
 class LabelEncoder(base.BaseEstimator):
@@ -32,7 +32,7 @@ def __init__(self, min_obs=10):
         self.min_obs = min_obs
 
     def __repr__(self):
-        return ('LabelEncoder(min_obs={})').format(self.min_obs)
+        return ("LabelEncoder(min_obs={})").format(self.min_obs)
 
     def _get_label_encoder_and_max(self, x):
         """Return a mapping from values and its maximum of a column to integer labels.
@@ -57,7 +57,9 @@ def _get_label_encoder_and_max(self, x):
         # that appear less than min_obs.
         offset = 0 if n_uniq == n_uniq_new else 1
 
-        label_encoder = pd.Series(np.arange(n_uniq_new) + offset, index=label_count.index)
+        label_encoder = pd.Series(
+            np.arange(n_uniq_new) + offset, index=label_count.index
+        )
         max_label = label_encoder.max()
         label_encoder = label_encoder.to_dict()
 
@@ -80,8 +82,10 @@ def fit(self, X, y=None):
         self.label_maxes = [None] * X.shape[1]
 
         for i, col in enumerate(X.columns):
-            self.label_encoders[i], self.label_maxes[i] = \
-                self._get_label_encoder_and_max(X[col])
+            (
+                self.label_encoders[i],
+                self.label_maxes[i],
+            ) = self._get_label_encoder_and_max(X[col])
 
         return self
 
@@ -114,8 +118,10 @@ def fit_transform(self, X, y=None):
         self.label_maxes = [None] * X.shape[1]
 
         for i, col in enumerate(X.columns):
-            self.label_encoders[i], self.label_maxes[i] = \
-                self._get_label_encoder_and_max(X[col])
+            (
+                self.label_encoders[i],
+                self.label_maxes[i],
+            ) = self._get_label_encoder_and_max(X[col])
 
             X.loc[:, col] = X[col].fillna(NAN_INT).map(self.label_encoders[i]).fillna(0)
 
@@ -144,7 +150,7 @@ def __init__(self, min_obs=10):
         self.label_encoder = LabelEncoder(min_obs)
 
     def __repr__(self):
-        return ('OneHotEncoder(min_obs={})').format(self.min_obs)
+        return ("OneHotEncoder(min_obs={})").format(self.min_obs)
 
     def _transform_col(self, x, i):
         """Encode one categorical column into sparse matrix with one-hot-encoding.
@@ -167,8 +173,9 @@ def _transform_col(self, x, i):
         j = labels[labels > 0] - 1  # column index starts from 0
 
         if len(i) > 0:
-            return sparse.coo_matrix((np.ones_like(i), (i, j)),
-                                     shape=(x.shape[0], label_max))
+            return sparse.coo_matrix(
+                (np.ones_like(i), (i, j)), shape=(x.shape[0], label_max)
+            )
         else:
             # if there is no non-zero value, return no matrix
             return None
@@ -197,8 +204,8 @@ def transform(self, X):
                 else:
                     X_new = sparse.hstack((X_new, X_col))
 
-            logger.debug('{} --> {} features'.format(
-                col, self.label_encoder.label_maxes[i])
+            logger.debug(
+                "{} --> {} features".format(col, self.label_encoder.label_maxes[i])
             )
 
         return X_new
@@ -236,13 +243,13 @@ def load_data(data, features, transformations={}):
     df.loc[:, bool_cols] = df[bool_cols].astype(np.int8)
 
     for col, transformation in transformations.items():
-        logger.info('Applying {} to {}'.format(transformation.__name__, col))
+        logger.info("Applying {} to {}".format(transformation.__name__, col))
         df[col] = df[col].apply(transformation)
 
     cat_cols = [col for col in features if df[col].dtype == np.object]
     num_cols = [col for col in features if col not in cat_cols]
 
-    logger.info('Applying one-hot-encoding to {}'.format(cat_cols))
+    logger.info("Applying one-hot-encoding to {}".format(cat_cols))
     ohe = OneHotEncoder(min_obs=df.shape[0] * 0.001)
     X_cat = ohe.fit_transform(df[cat_cols]).todense()
 
diff --git a/causalml/inference/iv/drivlearner.py b/causalml/inference/iv/drivlearner.py
index 2ae133a0..8c79ea75 100644
--- a/causalml/inference/iv/drivlearner.py
+++ b/causalml/inference/iv/drivlearner.py
@@ -265,7 +265,7 @@ def fit(
                     - p_0_filt
                 )
                 dr /= weight
-                self.models_tau[group][ifold].fit(X_filt, dr, sample_weight=weight ** 2)
+                self.models_tau[group][ifold].fit(X_filt, dr, sample_weight=weight**2)
 
     def predict(self, X, treatment=None, y=None, return_components=False, verbose=True):
         """Predict treatment effects.
@@ -524,14 +524,14 @@ def estimate_ate(
 
             part_1 = (
                 (y_filt_1 - yhat_1).var()
-                + _ate ** 2 * (treatment_filt_1 - prob_treatment_1).var()
+                + _ate**2 * (treatment_filt_1 - prob_treatment_1).var()
                 - 2
                 * _ate
                 * (y_filt_1 * treatment_filt_1 - yhat_1 * prob_treatment_1).mean()
             )
             part_0 = (
                 (y_filt_0 - yhat_0).var()
-                + _ate ** 2 * (treatment_filt_0 - prob_treatment_0).var()
+                + _ate**2 * (treatment_filt_0 - prob_treatment_0).var()
                 - 2
                 * _ate
                 * (y_filt_0 * treatment_filt_0 - yhat_0 * prob_treatment_0).mean()
diff --git a/causalml/inference/iv/iv_regression.py b/causalml/inference/iv/iv_regression.py
index 0cfa71ef..02cf12e2 100644
--- a/causalml/inference/iv/iv_regression.py
+++ b/causalml/inference/iv/iv_regression.py
@@ -7,27 +7,27 @@
 
 
 class IVRegressor(object):
-    ''' A wrapper class that uses IV2SLS from statsmodel
+    """A wrapper class that uses IV2SLS from statsmodel
 
     A linear 2SLS model that estimates the average treatment effect with endogenous treatment variable.
-    '''
+    """
 
     def __init__(self):
-        '''
+        """
         Initializes the class.
-        '''
+        """
 
-        self.method = '2SLS'
+        self.method = "2SLS"
 
     def fit(self, X, treatment, y, w):
-        ''' Fits the 2SLS model.
+        """Fits the 2SLS model.
 
         Args:
             X (np.matrix or np.array or pd.Dataframe): a feature matrix
             treatment (np.array or pd.Series): a treatment vector
             y (np.array or pd.Series): an outcome vector
             w (np.array or pd.Series): an instrument vector
-        '''
+        """
 
         X, treatment, y, w = convert_pd_to_np(X, treatment, y, w)
 
@@ -39,11 +39,11 @@ def fit(self, X, treatment, y, w):
         self.iv_fit = self.iv_model.fit()
 
     def predict(self):
-        '''Returns the average treatment effect and its estimated standard error
+        """Returns the average treatment effect and its estimated standard error
 
         Returns:
             (float): average treatment effect
             (float): standard error of the estimation
-        '''
+        """
 
-        return self.iv_fit.params[-1], self.iv_fit.bse[-1]
\ No newline at end of file
+        return self.iv_fit.params[-1], self.iv_fit.bse[-1]
diff --git a/causalml/inference/meta/__init__.py b/causalml/inference/meta/__init__.py
index 119824f5..1ddf1ced 100644
--- a/causalml/inference/meta/__init__.py
+++ b/causalml/inference/meta/__init__.py
@@ -1,5 +1,11 @@
 from .slearner import LRSRegressor, BaseSLearner, BaseSRegressor, BaseSClassifier
-from .tlearner import XGBTRegressor, MLPTRegressor, BaseTLearner, BaseTRegressor, BaseTClassifier
+from .tlearner import (
+    XGBTRegressor,
+    MLPTRegressor,
+    BaseTLearner,
+    BaseTRegressor,
+    BaseTClassifier,
+)
 from .xlearner import BaseXLearner, BaseXRegressor, BaseXClassifier
 from .rlearner import BaseRLearner, BaseRRegressor, BaseRClassifier, XGBRRegressor
 from .tmle import TMLELearner
diff --git a/causalml/inference/meta/base.py b/causalml/inference/meta/base.py
index 87af438c..ea6b0ded 100644
--- a/causalml/inference/meta/base.py
+++ b/causalml/inference/meta/base.py
@@ -8,26 +8,46 @@
 from causalml.propensity import compute_propensity_score
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 class BaseLearner(metaclass=ABCMeta):
-
     @abstractclassmethod
     def fit(self, X, treatment, y, p=None):
         pass
 
     @abstractclassmethod
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False, verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         pass
 
-    def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=1000, bootstrap_size=10000,
-                    return_components=False, verbose=True):
+    def fit_predict(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        return_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+        return_components=False,
+        verbose=True,
+    ):
         self.fit(X, treatment, y, p)
         return self.predict(X, treatment, y, p, return_components, verbose)
 
     @abstractclassmethod
-    def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps=1000, bootstrap_size=10000):
+    def estimate_ate(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        bootstrap_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+    ):
         pass
 
     def bootstrap(self, X, treatment, y, p=None, size=10000):
@@ -62,7 +82,9 @@ def _format_p(p, t_groups):
             treatment_name = t_groups[0]
             p = {treatment_name: convert_pd_to_np(p)}
         elif isinstance(p, dict):
-            p = {treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()}
+            p = {
+                treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()
+            }
 
         return p
 
@@ -80,7 +102,7 @@ def _set_propensity_models(self, X, treatment, y):
             treatment (np.array or pd.Series): a treatment vector
             y (np.array or pd.Series): an outcome vector
         """
-        logger.info('Generating propensity score')
+        logger.info("Generating propensity score")
         p = dict()
         p_model = dict()
         for group in self.t_groups:
@@ -89,15 +111,28 @@ def _set_propensity_models(self, X, treatment, y):
             X_filt = X[mask]
             w_filt = (treatment_filt == group).astype(int)
             w = (treatment == group).astype(int)
-            propensity_model = self.model_p if hasattr(self, 'model_p') else None
-            p[group], p_model[group] = compute_propensity_score(X=X_filt, treatment=w_filt,
-                                                                p_model=propensity_model,
-                                                                X_pred=X, treatment_pred=w)
+            propensity_model = self.model_p if hasattr(self, "model_p") else None
+            p[group], p_model[group] = compute_propensity_score(
+                X=X_filt,
+                treatment=w_filt,
+                p_model=propensity_model,
+                X_pred=X,
+                treatment_pred=w,
+            )
         self.propensity_model = p_model
         self.propensity = p
 
-    def get_importance(self, X=None, tau=None, model_tau_feature=None, features=None, method='auto', normalize=True,
-                       test_size=0.3, random_state=None):
+    def get_importance(
+        self,
+        X=None,
+        tau=None,
+        model_tau_feature=None,
+        features=None,
+        method="auto",
+        normalize=True,
+        test_size=0.3,
+        random_state=None,
+    ):
         """
         Builds a model (using X to predict estimated/actual tau), and then calculates feature importances
         based on a specified method.
@@ -123,10 +158,18 @@ def get_importance(self, X=None, tau=None, model_tau_feature=None, features=None
                                    permutation importance)
             random_state (int/RandomState instance/None): random state used in permutation importance estimation
         """
-        explainer = Explainer(method=method, control_name=self.control_name,
-                              X=X, tau=tau, model_tau=model_tau_feature,
-                              features=features, classes=self._classes, normalize=normalize,
-                              test_size=test_size, random_state=random_state)
+        explainer = Explainer(
+            method=method,
+            control_name=self.control_name,
+            X=X,
+            tau=tau,
+            model_tau=model_tau_feature,
+            features=features,
+            classes=self._classes,
+            normalize=normalize,
+            test_size=test_size,
+            random_state=random_state,
+        )
         return explainer.get_importance()
 
     def get_shap_values(self, X=None, model_tau_feature=None, tau=None, features=None):
@@ -138,13 +181,28 @@ def get_shap_values(self, X=None, model_tau_feature=None, tau=None, features=Non
             model_tau_feature (sklearn/lightgbm/xgboost model object): an unfitted model object
             features (optional, np.array): list/array of feature names. If None, an enumerated list will be used.
         """
-        explainer = Explainer(method='shapley', control_name=self.control_name,
-                              X=X, tau=tau, model_tau=model_tau_feature,
-                              features=features, classes=self._classes)
+        explainer = Explainer(
+            method="shapley",
+            control_name=self.control_name,
+            X=X,
+            tau=tau,
+            model_tau=model_tau_feature,
+            features=features,
+            classes=self._classes,
+        )
         return explainer.get_shap_values()
 
-    def plot_importance(self, X=None, tau=None, model_tau_feature=None, features=None, method='auto', normalize=True,
-                        test_size=0.3, random_state=None):
+    def plot_importance(
+        self,
+        X=None,
+        tau=None,
+        model_tau_feature=None,
+        features=None,
+        method="auto",
+        normalize=True,
+        test_size=0.3,
+        random_state=None,
+    ):
         """
         Builds a model (using X to predict estimated/actual tau), and then plots feature importances
         based on a specified method.
@@ -170,13 +228,29 @@ def plot_importance(self, X=None, tau=None, model_tau_feature=None, features=Non
                                    permutation importance)
             random_state (int/RandomState instance/None): random state used in permutation importance estimation
         """
-        explainer = Explainer(method=method, control_name=self.control_name,
-                              X=X, tau=tau, model_tau=model_tau_feature,
-                              features=features, classes=self._classes, normalize=normalize,
-                              test_size=test_size, random_state=random_state)
+        explainer = Explainer(
+            method=method,
+            control_name=self.control_name,
+            X=X,
+            tau=tau,
+            model_tau=model_tau_feature,
+            features=features,
+            classes=self._classes,
+            normalize=normalize,
+            test_size=test_size,
+            random_state=random_state,
+        )
         explainer.plot_importance()
 
-    def plot_shap_values(self, X=None, tau=None, model_tau_feature=None, features=None, shap_dict=None, **kwargs):
+    def plot_shap_values(
+        self,
+        X=None,
+        tau=None,
+        model_tau_feature=None,
+        features=None,
+        shap_dict=None,
+        **kwargs
+    ):
         """
         Plots distribution of shapley values.
 
@@ -192,13 +266,30 @@ def plot_shap_values(self, X=None, tau=None, model_tau_feature=None, features=No
             shap_dict (optional, dict): a dict of shapley value matrices. If None, shap_dict will be computed.
         """
         override_checks = False if shap_dict is None else True
-        explainer = Explainer(method='shapley', control_name=self.control_name,
-                              X=X, tau=tau, model_tau=model_tau_feature,
-                              features=features, override_checks=override_checks, classes=self._classes)
+        explainer = Explainer(
+            method="shapley",
+            control_name=self.control_name,
+            X=X,
+            tau=tau,
+            model_tau=model_tau_feature,
+            features=features,
+            override_checks=override_checks,
+            classes=self._classes,
+        )
         explainer.plot_shap_values(shap_dict=shap_dict)
 
-    def plot_shap_dependence(self, treatment_group, feature_idx, X, tau, model_tau_feature=None, features=None,
-                             shap_dict=None, interaction_idx='auto', **kwargs):
+    def plot_shap_dependence(
+        self,
+        treatment_group,
+        feature_idx,
+        X,
+        tau,
+        model_tau_feature=None,
+        features=None,
+        shap_dict=None,
+        interaction_idx="auto",
+        **kwargs
+    ):
         """
         Plots dependency of shapley values for a specified feature, colored by an interaction feature.
 
@@ -225,12 +316,20 @@ def plot_shap_dependence(self, treatment_group, feature_idx, X, tau, model_tau_f
                 the SHAP interaction values).
         """
         override_checks = False if shap_dict is None else True
-        explainer = Explainer(method='shapley', control_name=self.control_name,
-                              X=X, tau=tau, model_tau=model_tau_feature,
-                              features=features, override_checks=override_checks,
-                              classes=self._classes)
-        explainer.plot_shap_dependence(treatment_group=treatment_group,
-                                       feature_idx=feature_idx,
-                                       shap_dict=shap_dict,
-                                       interaction_idx=interaction_idx,
-                                       **kwargs)
+        explainer = Explainer(
+            method="shapley",
+            control_name=self.control_name,
+            X=X,
+            tau=tau,
+            model_tau=model_tau_feature,
+            features=features,
+            override_checks=override_checks,
+            classes=self._classes,
+        )
+        explainer.plot_shap_dependence(
+            treatment_group=treatment_group,
+            feature_idx=feature_idx,
+            shap_dict=shap_dict,
+            interaction_idx=interaction_idx,
+            **kwargs
+        )
diff --git a/causalml/inference/meta/drlearner.py b/causalml/inference/meta/drlearner.py
index 439c2ee4..3ef0e49d 100644
--- a/causalml/inference/meta/drlearner.py
+++ b/causalml/inference/meta/drlearner.py
@@ -203,7 +203,9 @@ def fit(self, X, treatment, y, p=None, seed=None):
                 )
                 self.models_tau[group][ifold].fit(X_filt, dr)
 
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False, verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         """Predict treatment effects.
 
         Args:
@@ -224,8 +226,12 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
             models_tau = self.models_tau[group]
             _te = np.r_[[model.predict(X) for model in models_tau]].mean(axis=0)
             te[:, i] = np.ravel(_te)
-            yhat_cs[group] = np.r_[[model.predict(X) for model in self.models_mu_c]].mean(axis=0)
-            yhat_ts[group] = np.r_[[model.predict(X) for model in self.models_mu_t[group]]].mean(axis=0)
+            yhat_cs[group] = np.r_[
+                [model.predict(X) for model in self.models_mu_c]
+            ].mean(axis=0)
+            yhat_ts[group] = np.r_[
+                [model.predict(X) for model in self.models_mu_t[group]]
+            ].mean(axis=0)
 
             if (y is not None) and (treatment is not None) and verbose:
                 mask = (treatment == group) | (treatment == self.control_name)
@@ -356,7 +362,9 @@ def estimate_ate(
         Returns:
             The mean and confidence interval (LB, UB) of the ATE estimate.
         """
-        te, yhat_cs, yhat_ts = self.fit_predict(X, treatment, y, p, return_components=True, seed=seed)
+        te, yhat_cs, yhat_ts = self.fit_predict(
+            X, treatment, y, p, return_components=True, seed=seed
+        )
         X, treatment, y = convert_pd_to_np(X, treatment, y)
 
         if p is None:
@@ -389,13 +397,14 @@ def estimate_ate(
 
             # SE formula is based on the lower bound formula (7) from Imbens, Guido W., and Jeffrey M. Wooldridge. 2009.
             # "Recent Developments in the Econometrics of Program Evaluation." Journal of Economic Literature
-            se = np.sqrt((
-                (y_filt[w == 0] - yhat_c[w == 0]).var()
-                / (1 - prob_treatment) +
-                (y_filt[w == 1] - yhat_t[w == 1]).var()
-                / prob_treatment +
-                (yhat_t - yhat_c).var()
-            ) / y_filt.shape[0])
+            se = np.sqrt(
+                (
+                    (y_filt[w == 0] - yhat_c[w == 0]).var() / (1 - prob_treatment)
+                    + (y_filt[w == 1] - yhat_t[w == 1]).var() / prob_treatment
+                    + (yhat_t - yhat_c).var()
+                )
+                / y_filt.shape[0]
+            )
 
             _ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
             _ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)
@@ -417,7 +426,9 @@ def estimate_ate(
             ate_bootstraps = np.zeros(shape=(self.t_groups.shape[0], n_bootstraps))
 
             for n in tqdm(range(n_bootstraps)):
-                cate_b = self.bootstrap(X, treatment, y, p, size=bootstrap_size, seed=seed)
+                cate_b = self.bootstrap(
+                    X, treatment, y, p, size=bootstrap_size, seed=seed
+                )
                 ate_bootstraps[:, n] = cate_b.mean()
 
             ate_lower = np.percentile(
@@ -441,13 +452,15 @@ class BaseDRRegressor(BaseDRLearner):
     A parent class for DR-learner regressor classes.
     """
 
-    def __init__(self,
-                 learner=None,
-                 control_outcome_learner=None,
-                 treatment_outcome_learner=None,
-                 treatment_effect_learner=None,
-                 ate_alpha=.05,
-                 control_name=0):
+    def __init__(
+        self,
+        learner=None,
+        control_outcome_learner=None,
+        treatment_outcome_learner=None,
+        treatment_effect_learner=None,
+        ate_alpha=0.05,
+        control_name=0,
+    ):
         """Initialize an DR-learner regressor.
 
         Args:
@@ -466,11 +479,15 @@ def __init__(self,
             treatment_outcome_learner=treatment_outcome_learner,
             treatment_effect_learner=treatment_effect_learner,
             ate_alpha=ate_alpha,
-            control_name=control_name)
+            control_name=control_name,
+        )
+
 
 class XGBDRRegressor(BaseDRRegressor):
-    def __init__(self, ate_alpha=.05, control_name=0, *args, **kwargs):
+    def __init__(self, ate_alpha=0.05, control_name=0, *args, **kwargs):
         """Initialize a DR-learner with two XGBoost models."""
-        super().__init__(learner=XGBRegressor(*args, **kwargs),
-                         ate_alpha=ate_alpha,
-                         control_name=control_name)
+        super().__init__(
+            learner=XGBRegressor(*args, **kwargs),
+            ate_alpha=ate_alpha,
+            control_name=control_name,
+        )
diff --git a/causalml/inference/meta/explainer.py b/causalml/inference/meta/explainer.py
index 19d1ddf8..4490a697 100644
--- a/causalml/inference/meta/explainer.py
+++ b/causalml/inference/meta/explainer.py
@@ -8,13 +8,25 @@
 
 from causalml.inference.meta.utils import convert_pd_to_np
 
-VALID_METHODS = ('auto', 'permutation', 'shapley')
+VALID_METHODS = ("auto", "permutation", "shapley")
 
 
 class Explainer(object):
-    def __init__(self, method, control_name, X, tau, classes, model_tau=None,
-                 features=None, normalize=True, test_size=0.3, random_state=None, override_checks=False,
-                 r_learners=None):
+    def __init__(
+        self,
+        method,
+        control_name,
+        X,
+        tau,
+        classes,
+        model_tau=None,
+        features=None,
+        normalize=True,
+        test_size=0.3,
+        random_state=None,
+        override_checks=False,
+        r_learners=None,
+    ):
         """
         The Explainer class handles all feature explanation/interpretation functions, including plotting
         feature importances, shapley value distributions, and shapley value dependency plots.
@@ -52,7 +64,9 @@ def __init__(self, method, control_name, X, tau, classes, model_tau=None,
         if self.tau is not None and self.tau.ndim == 1:
             self.tau = self.tau.reshape(-1, 1)
         self.classes = classes
-        self.model_tau = LGBMRegressor(importance_type='gain') if model_tau is None else model_tau
+        self.model_tau = (
+            LGBMRegressor(importance_type="gain") if model_tau is None else model_tau
+        )
         self.features = features
         self.normalize = normalize
         self.test_size = test_size
@@ -72,15 +86,21 @@ def check_conditions(self):
             - X, tau, and classes are specified
             - model_tau has feature_importances_ attribute after fitting
         """
-        assert self.method in VALID_METHODS, 'Current supported methods: {}'.format(', '.join(VALID_METHODS))
+        assert self.method in VALID_METHODS, "Current supported methods: {}".format(
+            ", ".join(VALID_METHODS)
+        )
 
-        assert all(obj is not None for obj in (self.X, self.tau, self.classes)), \
-            "X, tau, and classes must be provided."
+        assert all(
+            obj is not None for obj in (self.X, self.tau, self.classes)
+        ), "X, tau, and classes must be provided."
 
         model_test = deepcopy(self.model_tau)
-        model_test.fit([[0], [1]], [0, 1])  # Fit w/ dummy data to check for feature_importances_ below
-        assert hasattr(model_test, "feature_importances_"), \
-            "model_tau must have the feature_importances_ method (after fitting)"
+        model_test.fit(
+            [[0], [1]], [0, 1]
+        )  # Fit w/ dummy data to check for feature_importances_ below
+        assert hasattr(
+            model_test, "feature_importances_"
+        ), "model_tau must have the feature_importances_ method (after fitting)"
 
     def create_feature_names(self):
         """
@@ -88,24 +108,28 @@ def create_feature_names(self):
         """
         if self.features is None:
             num_features = self.X.shape[1]
-            self.features = ['Feature_{:03d}'.format(i) for i in range(num_features)]
+            self.features = ["Feature_{:03d}".format(i) for i in range(num_features)]
 
     def build_new_tau_models(self):
         """
         Builds tau models (using X to predict estimated/actual tau) for each treatment group.
         """
-        if self.method in ('permutation'):
-            self.X_train, self.X_test, self.tau_train, self.tau_test = train_test_split(self.X,
-                                                                                        self.tau,
-                                                                                        test_size=self.test_size,
-                                                                                        random_state=self.random_state)
+        if self.method in ("permutation"):
+            self.X_train, self.X_test, self.tau_train, self.tau_test = train_test_split(
+                self.X,
+                self.tau,
+                test_size=self.test_size,
+                random_state=self.random_state,
+            )
         else:
             self.X_train, self.tau_train = self.X, self.tau
 
         if self.r_learners is not None:
             self.models_tau = deepcopy(self.r_learners)
         else:
-            self.models_tau = {group: deepcopy(self.model_tau) for group in self.classes}
+            self.models_tau = {
+                group: deepcopy(self.model_tau) for group in self.classes
+            }
             for group, idx in self.classes.items():
                 self.models_tau[group].fit(self.X_train, self.tau_train[:, idx])
 
@@ -113,11 +137,16 @@ def get_importance(self):
         """
         Calculates feature importances for each treatment group, based on specified method in __init__.
         """
-        importance_catalog = {'auto': self.default_importance, 'permutation': self.perm_importance}
+        importance_catalog = {
+            "auto": self.default_importance,
+            "permutation": self.perm_importance,
+        }
         importance_dict = importance_catalog[self.method]()
 
-        importance_dict = {group: pd.Series(array, index=self.features).sort_values(ascending=False)
-                           for group, array in importance_dict.items()}
+        importance_dict = {
+            group: pd.Series(array, index=self.features).sort_values(ascending=False)
+            for group, array in importance_dict.items()
+        }
         return importance_dict
 
     def default_importance(self):
@@ -130,7 +159,9 @@ def default_importance(self):
         for group, idx in self.classes.items():
             importance_dict[group] = self.models_tau[group].feature_importances_
             if self.normalize:
-                importance_dict[group] = importance_dict[group] / importance_dict[group].sum()
+                importance_dict[group] = (
+                    importance_dict[group] / importance_dict[group].sum()
+                )
 
         return importance_dict
 
@@ -144,10 +175,12 @@ def perm_importance(self):
             self.X_test, self.tau_test = self.X, self.tau
         for group, idx in self.classes.items():
             perm_estimator = self.models_tau[group]
-            importance_dict[group] = permutation_importance(estimator=perm_estimator,
-                                                            X=self.X_test,
-                                                            y=self.tau_test[:, idx],
-                                                            random_state=self.random_state).importances_mean
+            importance_dict[group] = permutation_importance(
+                estimator=perm_estimator,
+                X=self.X_test,
+                y=self.tau_test[:, idx],
+                random_state=self.random_state,
+            ).importances_mean
 
         return importance_dict
 
@@ -159,13 +192,15 @@ def get_shap_values(self):
         for group, mod in self.models_tau.items():
             explainer = shap.TreeExplainer(mod)
             if self.r_learners is not None:
-                explainer.model.original_model.params['objective'] = None  # hacky way of running shap without error
+                explainer.model.original_model.params[
+                    "objective"
+                ] = None  # hacky way of running shap without error
             shap_values = explainer.shap_values(self.X)
             shap_dict[group] = shap_values
 
         return shap_dict
 
-    def plot_importance(self, importance_dict=None, title_prefix=''):
+    def plot_importance(self, importance_dict=None, title_prefix=""):
         """
         Calculates and plots feature importances for each treatment group, based on specified method in __init__.
         Skips the calculation part if importance_dict is given.
@@ -174,10 +209,10 @@ def plot_importance(self, importance_dict=None, title_prefix=''):
             importance_dict = self.get_importance()
         for group, series in importance_dict.items():
             plt.figure()
-            series.sort_values().plot(kind='barh', figsize=(12, 8))
+            series.sort_values().plot(kind="barh", figsize=(12, 8))
             title = group
-            if title_prefix != '':
-                title = '{} - {}'.format(title_prefix, title)
+            if title_prefix != "":
+                title = "{} - {}".format(title_prefix, title)
             plt.title(title)
 
     def plot_shap_values(self, shap_dict=None):
@@ -192,29 +227,42 @@ def plot_shap_values(self, shap_dict=None):
             plt.title(group)
             shap.summary_plot(values, features=self.X, feature_names=self.features)
 
-    def plot_shap_dependence(self, treatment_group, feature_idx, shap_dict=None, interaction_idx='auto', **kwargs):
+    def plot_shap_dependence(
+        self,
+        treatment_group,
+        feature_idx,
+        shap_dict=None,
+        interaction_idx="auto",
+        **kwargs
+    ):
         """
-        Plots dependency of shapley values for a specified feature, colored by an interaction feature.
-        Skips the calculation part if shap_dict is given.
+         Plots dependency of shapley values for a specified feature, colored by an interaction feature.
+         Skips the calculation part if shap_dict is given.
 
-        This plots the value of the feature on the x-axis and the SHAP value of the same feature
-        on the y-axis. This shows how the model depends on the given feature, and is like a
-        richer extension of the classical partial dependence plots. Vertical dispersion of the
-        data points represents interaction effects.
+         This plots the value of the feature on the x-axis and the SHAP value of the same feature
+         on the y-axis. This shows how the model depends on the given feature, and is like a
+         richer extension of the classical partial dependence plots. Vertical dispersion of the
+         data points represents interaction effects.
 
-       Args:
-            treatment_group (str or int): name of treatment group to create dependency plot on
-            feature_idx (str or int): feature index / name to create dependency plot on
-            shap_dict (optional, dict): a dict of shapley value matrices. If None, shap_dict will be computed.
-            interaction_idx (optional, str or int): feature index / name used in coloring scheme as interaction feature.
-                If "auto" then shap.common.approximate_interactions is used to pick what seems to be the
-                strongest interaction (note that to find to true strongest interaction you need to compute
-                the SHAP interaction values).
+        Args:
+             treatment_group (str or int): name of treatment group to create dependency plot on
+             feature_idx (str or int): feature index / name to create dependency plot on
+             shap_dict (optional, dict): a dict of shapley value matrices. If None, shap_dict will be computed.
+             interaction_idx (optional, str or int): feature index / name used in coloring scheme as interaction feature.
+                 If "auto" then shap.common.approximate_interactions is used to pick what seems to be the
+                 strongest interaction (note that to find to true strongest interaction you need to compute
+                 the SHAP interaction values).
         """
         if shap_dict is None:
             shap_dict = self.get_shap_values()
 
         shap_values = shap_dict[treatment_group]
 
-        shap.dependence_plot(feature_idx, shap_values, self.X, interaction_index=interaction_idx,
-                             feature_names=self.features, **kwargs)
+        shap.dependence_plot(
+            feature_idx,
+            shap_values,
+            self.X,
+            interaction_index=interaction_idx,
+            feature_names=self.features,
+            **kwargs
+        )
diff --git a/causalml/inference/meta/rlearner.py b/causalml/inference/meta/rlearner.py
index 10b54a88..2fea90eb 100644
--- a/causalml/inference/meta/rlearner.py
+++ b/causalml/inference/meta/rlearner.py
@@ -8,13 +8,17 @@
 from xgboost import XGBRegressor
 
 from causalml.inference.meta.base import BaseLearner
-from causalml.inference.meta.utils import (check_treatment_vector,
-    get_xgboost_objective_metric, convert_pd_to_np, get_weighted_variance)
+from causalml.inference.meta.utils import (
+    check_treatment_vector,
+    get_xgboost_objective_metric,
+    convert_pd_to_np,
+    get_weighted_variance,
+)
 from causalml.inference.meta.explainer import Explainer
 from causalml.propensity import compute_propensity_score, ElasticNetPropensityModel
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 class BaseRLearner(BaseLearner):
@@ -25,15 +29,17 @@ class BaseRLearner(BaseLearner):
     Details of R-learner are available at Nie and Wager (2019) (https://arxiv.org/abs/1712.04912).
     """
 
-    def __init__(self,
-                 learner=None,
-                 outcome_learner=None,
-                 effect_learner=None,
-                 propensity_learner=ElasticNetPropensityModel(),
-                 ate_alpha=.05,
-                 control_name=0,
-                 n_fold=5,
-                 random_state=None):
+    def __init__(
+        self,
+        learner=None,
+        outcome_learner=None,
+        effect_learner=None,
+        propensity_learner=ElasticNetPropensityModel(),
+        ate_alpha=0.05,
+        control_name=0,
+        n_fold=5,
+        random_state=None,
+    ):
         """Initialize an R-learner.
 
         Args:
@@ -48,11 +54,17 @@ def __init__(self,
             n_fold (int, optional): the number of cross validation folds for outcome_learner
             random_state (int or RandomState, optional): a seed (int) or random number generator (RandomState)
         """
-        assert (learner is not None) or ((outcome_learner is not None) and (effect_learner is not None))
+        assert (learner is not None) or (
+            (outcome_learner is not None) and (effect_learner is not None)
+        )
         assert propensity_learner is not None
 
-        self.model_mu = outcome_learner if outcome_learner is not None else deepcopy(learner)
-        self.model_tau = effect_learner if effect_learner is not None else deepcopy(learner)
+        self.model_mu = (
+            outcome_learner if outcome_learner is not None else deepcopy(learner)
+        )
+        self.model_tau = (
+            effect_learner if effect_learner is not None else deepcopy(learner)
+        )
         self.model_p = propensity_learner
 
         self.ate_alpha = ate_alpha
@@ -65,10 +77,12 @@ def __init__(self,
         self.propensity_model = None
 
     def __repr__(self):
-        return (f'{self.__class__.__name__}\n'
-                f'\toutcome_learner={self.model_mu.__repr__()}\n'
-                f'\teffect_learner={self.model_tau.__repr__()}\n'
-                f'\tpropensity_learner={self.model_p.__repr__()}')
+        return (
+            f"{self.__class__.__name__}\n"
+            f"\toutcome_learner={self.model_mu.__repr__()}\n"
+            f"\teffect_learner={self.model_tau.__repr__()}\n"
+            f"\tpropensity_learner={self.model_p.__repr__()}"
+        )
 
     def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         """Fit the treatment effect and outcome models of the R learner.
@@ -87,7 +101,9 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         X, treatment, y = convert_pd_to_np(X, treatment, y)
         check_treatment_vector(treatment, self.control_name)
         if sample_weight is not None:
-            assert len(sample_weight) == len(y), "Data length must be equal for sample_weight and the input data"
+            assert len(sample_weight) == len(
+                y
+            ), "Data length must be equal for sample_weight and the input data"
             sample_weight = convert_pd_to_np(sample_weight)
         self.t_groups = np.unique(treatment[treatment != self.control_name])
         self.t_groups.sort()
@@ -104,7 +120,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         self.vars_t = {}
 
         if verbose:
-            logger.info('generating out-of-fold CV outcome estimates')
+            logger.info("generating out-of-fold CV outcome estimates")
         yhat = cross_val_predict(self.model_mu, X, y, cv=self.cv, n_jobs=-1)
 
         for group in self.t_groups:
@@ -131,9 +147,14 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
                 self.vars_t[group] = diff_t.var()
 
             if verbose:
-                logger.info('training the treatment effect model for {} with R-loss'.format(group))
-            self.models_tau[group].fit(X_filt, (y_filt - yhat_filt) / (w - p_filt),
-                                       sample_weight=weight)
+                logger.info(
+                    "training the treatment effect model for {} with R-loss".format(
+                        group
+                    )
+                )
+            self.models_tau[group].fit(
+                X_filt, (y_filt - yhat_filt) / (w - p_filt), sample_weight=weight
+            )
 
     def predict(self, X, p=None):
         """Predict treatment effects.
@@ -152,8 +173,18 @@ def predict(self, X, p=None):
 
         return te
 
-    def fit_predict(self, X, treatment, y, p=None, sample_weight=None, return_ci=False,
-                    n_bootstraps=1000, bootstrap_size=10000, verbose=True):
+    def fit_predict(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        sample_weight=None,
+        return_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+        verbose=True,
+    ):
         """Fit the treatment effect and outcome models of the R learner and predict treatment effects.
 
         Args:
@@ -185,9 +216,11 @@ def fit_predict(self, X, treatment, y, p=None, sample_weight=None, return_ci=Fal
             _classes_global = self._classes
             model_mu_global = deepcopy(self.model_mu)
             models_tau_global = deepcopy(self.models_tau)
-            te_bootstraps = np.zeros(shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps))
+            te_bootstraps = np.zeros(
+                shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps)
+            )
 
-            logger.info('Bootstrap Confidence Intervals')
+            logger.info("Bootstrap Confidence Intervals")
             for i in tqdm(range(n_bootstraps)):
                 if p is None:
                     p = self.propensity
@@ -197,7 +230,9 @@ def fit_predict(self, X, treatment, y, p=None, sample_weight=None, return_ci=Fal
                 te_bootstraps[:, :, i] = te_b
 
             te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2)
-            te_upper = np.percentile(te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2)
+            te_upper = np.percentile(
+                te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -207,8 +242,17 @@ def fit_predict(self, X, treatment, y, p=None, sample_weight=None, return_ci=Fal
 
             return (te, te_lower, te_upper)
 
-    def estimate_ate(self, X, treatment, y, p=None, sample_weight=None, bootstrap_ci=False,
-                     n_bootstraps=1000, bootstrap_size=10000):
+    def estimate_ate(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        sample_weight=None,
+        bootstrap_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+    ):
         """Estimate the Average Treatment Effect (ATE).
 
         Args:
@@ -238,10 +282,14 @@ def estimate_ate(self, X, treatment, y, p=None, sample_weight=None, bootstrap_ci
             prob_treatment = float(sum(w)) / X.shape[0]
             _ate = te[:, i].mean()
 
-            se = (np.sqrt((self.vars_t[group] / prob_treatment)
-                          + (self.vars_c[group] / (1 - prob_treatment))
-                          + te[:, i].var())
-                  / X.shape[0])
+            se = (
+                np.sqrt(
+                    (self.vars_t[group] / prob_treatment)
+                    + (self.vars_c[group] / (1 - prob_treatment))
+                    + te[:, i].var()
+                )
+                / X.shape[0]
+            )
 
             _ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
             _ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)
@@ -258,7 +306,7 @@ def estimate_ate(self, X, treatment, y, p=None, sample_weight=None, bootstrap_ci
             model_mu_global = deepcopy(self.model_mu)
             models_tau_global = deepcopy(self.models_tau)
 
-            logger.info('Bootstrap Confidence Intervals for ATE')
+            logger.info("Bootstrap Confidence Intervals for ATE")
             ate_bootstraps = np.zeros(shape=(self.t_groups.shape[0], n_bootstraps))
 
             for n in tqdm(range(n_bootstraps)):
@@ -269,8 +317,12 @@ def estimate_ate(self, X, treatment, y, p=None, sample_weight=None, bootstrap_ci
                 cate_b = self.bootstrap(X, treatment, y, p, size=bootstrap_size)
                 ate_bootstraps[:, n] = cate_b.mean()
 
-            ate_lower = np.percentile(ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1)
-            ate_upper = np.percentile(ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1)
+            ate_lower = np.percentile(
+                ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1
+            )
+            ate_upper = np.percentile(
+                ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -285,15 +337,17 @@ class BaseRRegressor(BaseRLearner):
     A parent class for R-learner regressor classes.
     """
 
-    def __init__(self,
-                 learner=None,
-                 outcome_learner=None,
-                 effect_learner=None,
-                 propensity_learner=ElasticNetPropensityModel(),
-                 ate_alpha=.05,
-                 control_name=0,
-                 n_fold=5,
-                 random_state=None):
+    def __init__(
+        self,
+        learner=None,
+        outcome_learner=None,
+        effect_learner=None,
+        propensity_learner=ElasticNetPropensityModel(),
+        ate_alpha=0.05,
+        control_name=0,
+        n_fold=5,
+        random_state=None,
+    ):
         """Initialize an R-learner regressor.
 
         Args:
@@ -316,7 +370,8 @@ def __init__(self,
             ate_alpha=ate_alpha,
             control_name=control_name,
             n_fold=n_fold,
-            random_state=random_state)
+            random_state=random_state,
+        )
 
 
 class BaseRClassifier(BaseRLearner):
@@ -324,14 +379,16 @@ class BaseRClassifier(BaseRLearner):
     A parent class for R-learner classifier classes.
     """
 
-    def __init__(self,
-                 outcome_learner=None,
-                 effect_learner=None,
-                 propensity_learner=ElasticNetPropensityModel(),
-                 ate_alpha=.05,
-                 control_name=0,
-                 n_fold=5,
-                 random_state=None):
+    def __init__(
+        self,
+        outcome_learner=None,
+        effect_learner=None,
+        propensity_learner=ElasticNetPropensityModel(),
+        ate_alpha=0.05,
+        control_name=0,
+        n_fold=5,
+        random_state=None,
+    ):
         """Initialize an R-learner classifier.
 
         Args:
@@ -353,10 +410,13 @@ def __init__(self,
             ate_alpha=ate_alpha,
             control_name=control_name,
             n_fold=n_fold,
-            random_state=random_state)
+            random_state=random_state,
+        )
 
         if (outcome_learner is None) and (effect_learner is None):
-            raise ValueError("Either the outcome learner or the effect learner must be specified.")
+            raise ValueError(
+                "Either the outcome learner or the effect learner must be specified."
+            )
 
     def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         """Fit the treatment effect and outcome models of the R learner.
@@ -375,7 +435,9 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         X, treatment, y = convert_pd_to_np(X, treatment, y)
         check_treatment_vector(treatment, self.control_name)
         if sample_weight is not None:
-            assert len(sample_weight) == len(y), "Data length must be equal for sample_weight and the input data"
+            assert len(sample_weight) == len(
+                y
+            ), "Data length must be equal for sample_weight and the input data"
             sample_weight = convert_pd_to_np(sample_weight)
         self.t_groups = np.unique(treatment[treatment != self.control_name])
         self.t_groups.sort()
@@ -392,8 +454,10 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         self.vars_t = {}
 
         if verbose:
-            logger.info('generating out-of-fold CV outcome estimates')
-        yhat = cross_val_predict(self.model_mu, X, y, cv=self.cv, method='predict_proba', n_jobs=-1)[:, 1]
+            logger.info("generating out-of-fold CV outcome estimates")
+        yhat = cross_val_predict(
+            self.model_mu, X, y, cv=self.cv, method="predict_proba", n_jobs=-1
+        )[:, 1]
 
         for group in self.t_groups:
             mask = (treatment == group) | (treatment == self.control_name)
@@ -419,9 +483,14 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
                 self.vars_t[group] = diff_t.var()
 
             if verbose:
-                logger.info('training the treatment effect model for {} with R-loss'.format(group))
-            self.models_tau[group].fit(X_filt, (y_filt - yhat_filt) / (w - p_filt),
-                                       sample_weight=weight)
+                logger.info(
+                    "training the treatment effect model for {} with R-loss".format(
+                        group
+                    )
+                )
+            self.models_tau[group].fit(
+                X_filt, (y_filt - yhat_filt) / (w - p_filt), sample_weight=weight
+            )
 
     def predict(self, X, p=None):
         """Predict treatment effects.
@@ -442,15 +511,17 @@ def predict(self, X, p=None):
 
 
 class XGBRRegressor(BaseRRegressor):
-    def __init__(self,
-                 early_stopping=True,
-                 test_size=0.3,
-                 early_stopping_rounds=30,
-                 effect_learner_objective='rank:pairwise',
-                 effect_learner_n_estimators=500,
-                 random_state=42,
-                 *args,
-                 **kwargs):
+    def __init__(
+        self,
+        early_stopping=True,
+        test_size=0.3,
+        early_stopping_rounds=30,
+        effect_learner_objective="rank:pairwise",
+        effect_learner_n_estimators=500,
+        random_state=42,
+        *args,
+        **kwargs,
+    ):
         """Initialize an R-learner regressor with XGBoost model using pairwise ranking objective.
 
         Args:
@@ -464,7 +535,7 @@ def __init__(self,
             effect_learner_n_estimators (int, optional): number of trees to fit for the effect learner (default = 500)
         """
 
-        assert isinstance(random_state, int), 'random_state should be int.'
+        assert isinstance(random_state, int), "random_state should be int."
 
         objective, metric = get_xgboost_objective_metric(effect_learner_objective)
         self.effect_learner_objective = objective
@@ -477,11 +548,13 @@ def __init__(self,
 
         super().__init__(
             outcome_learner=XGBRegressor(random_state=random_state, *args, **kwargs),
-            effect_learner=XGBRegressor(objective=self.effect_learner_objective,
-                                        n_estimators=self.effect_learner_n_estimators,
-                                        random_state=random_state,
-                                        *args,
-                                        **kwargs)
+            effect_learner=XGBRegressor(
+                objective=self.effect_learner_objective,
+                n_estimators=self.effect_learner_n_estimators,
+                random_state=random_state,
+                *args,
+                **kwargs,
+            ),
         )
 
     def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
@@ -500,8 +573,14 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         X, treatment, y = convert_pd_to_np(X, treatment, y)
         check_treatment_vector(treatment, self.control_name)
         # initialize equal sample weight if it's not provided, for simplicity purpose
-        sample_weight = convert_pd_to_np(sample_weight) if sample_weight is not None else convert_pd_to_np(np.ones(len(y)))
-        assert len(sample_weight) == len(y), "Data length must be equal for sample_weight and the input data"
+        sample_weight = (
+            convert_pd_to_np(sample_weight)
+            if sample_weight is not None
+            else convert_pd_to_np(np.ones(len(y)))
+        )
+        assert len(sample_weight) == len(
+            y
+        ), "Data length must be equal for sample_weight and the input data"
         self.t_groups = np.unique(treatment[treatment != self.control_name])
         self.t_groups.sort()
 
@@ -517,7 +596,7 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
         self.vars_t = {}
 
         if verbose:
-            logger.info('generating out-of-fold CV outcome estimates')
+            logger.info("generating out-of-fold CV outcome estimates")
         yhat = cross_val_predict(self.model_mu, X, y, cv=self.cv, n_jobs=-1)
 
         for group in self.t_groups:
@@ -532,31 +611,64 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
             sample_weight_filt = sample_weight[treatment_mask]
 
             if verbose:
-                logger.info('training the treatment effect model for {} with R-loss'.format(group))
+                logger.info(
+                    "training the treatment effect model for {} with R-loss".format(
+                        group
+                    )
+                )
 
             if self.early_stopping:
-                X_train_filt, X_test_filt, y_train_filt, y_test_filt, yhat_train_filt, yhat_test_filt, \
-                    w_train, w_test, p_train_filt, p_test_filt, sample_weight_train_filt, sample_weight_test_filt \
-                    = train_test_split(
-                        X_filt, y_filt, yhat_filt, w, p_filt, sample_weight_filt,
-                        test_size=self.test_size, random_state=self.random_state
-                    )
+                (
+                    X_train_filt,
+                    X_test_filt,
+                    y_train_filt,
+                    y_test_filt,
+                    yhat_train_filt,
+                    yhat_test_filt,
+                    w_train,
+                    w_test,
+                    p_train_filt,
+                    p_test_filt,
+                    sample_weight_train_filt,
+                    sample_weight_test_filt,
+                ) = train_test_split(
+                    X_filt,
+                    y_filt,
+                    yhat_filt,
+                    w,
+                    p_filt,
+                    sample_weight_filt,
+                    test_size=self.test_size,
+                    random_state=self.random_state,
+                )
 
                 weight = sample_weight_filt
-                self.models_tau[group].fit(X=X_train_filt,
-                                           y=(y_train_filt - yhat_train_filt) / (w_train - p_train_filt),
-                                           sample_weight=sample_weight_train_filt * ((w_train - p_train_filt) ** 2),
-                                           eval_set=[(X_test_filt,
-                                                      (y_test_filt - yhat_test_filt) / (w_test - p_test_filt))],
-                                           sample_weight_eval_set=[sample_weight_test_filt * ((w_test - p_test_filt) ** 2)],
-                                           eval_metric=self.effect_learner_eval_metric,
-                                           early_stopping_rounds=self.early_stopping_rounds,
-                                           verbose=verbose)
+                self.models_tau[group].fit(
+                    X=X_train_filt,
+                    y=(y_train_filt - yhat_train_filt) / (w_train - p_train_filt),
+                    sample_weight=sample_weight_train_filt
+                    * ((w_train - p_train_filt) ** 2),
+                    eval_set=[
+                        (
+                            X_test_filt,
+                            (y_test_filt - yhat_test_filt) / (w_test - p_test_filt),
+                        )
+                    ],
+                    sample_weight_eval_set=[
+                        sample_weight_test_filt * ((w_test - p_test_filt) ** 2)
+                    ],
+                    eval_metric=self.effect_learner_eval_metric,
+                    early_stopping_rounds=self.early_stopping_rounds,
+                    verbose=verbose,
+                )
 
             else:
-                self.models_tau[group].fit(X_filt, (y_filt - yhat_filt) / (w - p_filt),
-                                           sample_weight=sample_weight_filt * ((w - p_filt) ** 2),
-                                           eval_metric=self.effect_learner_eval_metric)
+                self.models_tau[group].fit(
+                    X_filt,
+                    (y_filt - yhat_filt) / (w - p_filt),
+                    sample_weight=sample_weight_filt * ((w - p_filt) ** 2),
+                    eval_metric=self.effect_learner_eval_metric,
+                )
 
             diff_c = y_filt[w == 0] - yhat_filt[w == 0]
             diff_t = y_filt[w == 1] - yhat_filt[w == 1]
@@ -564,4 +676,3 @@ def fit(self, X, treatment, y, p=None, sample_weight=None, verbose=True):
             sample_weight_filt_t = sample_weight_filt[w == 1]
             self.vars_c[group] = get_weighted_variance(diff_c, sample_weight_filt_c)
             self.vars_t[group] = get_weighted_variance(diff_t, sample_weight_filt_t)
-
diff --git a/causalml/inference/meta/slearner.py b/causalml/inference/meta/slearner.py
index 28ce12b4..52470844 100644
--- a/causalml/inference/meta/slearner.py
+++ b/causalml/inference/meta/slearner.py
@@ -12,13 +12,13 @@
 from causalml.metrics import regression_metrics, classification_metrics
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 class StatsmodelsOLS:
     """A sklearn style wrapper class for statsmodels' OLS."""
 
-    def __init__(self, cov_type='HC1', alpha=.05):
+    def __init__(self, cov_type="HC1", alpha=0.05):
         """Initialize a statsmodels' OLS wrapper class object.
         Args:
             cov_type (str, optional): covariance estimator type.
@@ -34,14 +34,14 @@ def fit(self, X, y):
             y (np.array): a label vector
         """
         # Append ones. The first column is for the treatment indicator.
-        X = sm.add_constant(X, prepend=False, has_constant='add')
+        X = sm.add_constant(X, prepend=False, has_constant="add")
         self.model = sm.OLS(y, X).fit(cov_type=self.cov_type)
         self.coefficients = self.model.params
         self.conf_ints = self.model.conf_int(alpha=self.alpha)
 
     def predict(self, X):
         # Append ones. The first column is for the treatment indicator.
-        X = sm.add_constant(X, prepend=False, has_constant='add')
+        X = sm.add_constant(X, prepend=False, has_constant="add")
         return self.model.predict(X)
 
 
@@ -65,8 +65,7 @@ def __init__(self, learner=None, ate_alpha=0.05, control_name=0):
         self.control_name = control_name
 
     def __repr__(self):
-        return '{}(model={})'.format(self.__class__.__name__,
-                                     self.model.__repr__())
+        return "{}(model={})".format(self.__class__.__name__, self.model.__repr__())
 
     def fit(self, X, treatment, y, p=None):
         """Fit the inference model
@@ -92,7 +91,9 @@ def fit(self, X, treatment, y, p=None):
             X_new = np.hstack((w.reshape((-1, 1)), X_filt))
             self.models[group].fit(X_new, y_filt)
 
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False, verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         """Predict treatment effects.
         Args:
             X (np.matrix or np.array or pd.Dataframe): a feature matrix
@@ -128,7 +129,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
                 yhat[w == 0] = yhat_cs[group][mask][w == 0]
                 yhat[w == 1] = yhat_ts[group][mask][w == 1]
 
-                logger.info('Error metrics for group {}'.format(group))
+                logger.info("Error metrics for group {}".format(group))
                 regression_metrics(y_filt, yhat, w)
 
         te = np.zeros((X.shape[0], self.t_groups.shape[0]))
@@ -140,8 +141,18 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
         else:
             return te, yhat_cs, yhat_ts
 
-    def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=1000, bootstrap_size=10000,
-                    return_components=False, verbose=True):
+    def fit_predict(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        return_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+        return_components=False,
+        verbose=True,
+    ):
         """Fit the inference model of the S learner and predict treatment effects.
         Args:
             X (np.matrix, np.array, or pd.Dataframe): a feature matrix
@@ -166,15 +177,19 @@ def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=100
             t_groups_global = self.t_groups
             _classes_global = self._classes
             models_global = deepcopy(self.models)
-            te_bootstraps = np.zeros(shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps))
+            te_bootstraps = np.zeros(
+                shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps)
+            )
 
-            logger.info('Bootstrap Confidence Intervals')
+            logger.info("Bootstrap Confidence Intervals")
             for i in tqdm(range(n_bootstraps)):
                 te_b = self.bootstrap(X, treatment, y, size=bootstrap_size)
                 te_bootstraps[:, :, i] = te_b
 
-            te_lower = np.percentile(te_bootstraps, (self.ate_alpha/2)*100, axis=2)
-            te_upper = np.percentile(te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2)
+            te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2)
+            te_upper = np.percentile(
+                te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -183,8 +198,17 @@ def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=100
 
             return (te, te_lower, te_upper)
 
-    def estimate_ate(self, X, treatment, y, p=None, return_ci=False, bootstrap_ci=False,
-                     n_bootstraps=1000, bootstrap_size=10000):
+    def estimate_ate(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        return_ci=False,
+        bootstrap_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+    ):
         """Estimate the Average Treatment Effect (ATE).
 
         Args:
@@ -217,13 +241,14 @@ def estimate_ate(self, X, treatment, y, p=None, return_ci=False, bootstrap_ci=Fa
             yhat_c = yhat_cs[group][mask]
             yhat_t = yhat_ts[group][mask]
 
-            se = np.sqrt((
-                (y_filt[w == 0] - yhat_c[w == 0]).var()
-                / (1 - prob_treatment) +
-                (y_filt[w == 1] - yhat_t[w == 1]).var()
-                / prob_treatment +
-                (yhat_t - yhat_c).var()
-            ) / y_filt.shape[0])
+            se = np.sqrt(
+                (
+                    (y_filt[w == 0] - yhat_c[w == 0]).var() / (1 - prob_treatment)
+                    + (y_filt[w == 1] - yhat_t[w == 1]).var() / prob_treatment
+                    + (yhat_t - yhat_c).var()
+                )
+                / y_filt.shape[0]
+            )
 
             _ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
             _ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)
@@ -241,15 +266,19 @@ def estimate_ate(self, X, treatment, y, p=None, return_ci=False, bootstrap_ci=Fa
             _classes_global = self._classes
             models_global = deepcopy(self.models)
 
-            logger.info('Bootstrap Confidence Intervals for ATE')
+            logger.info("Bootstrap Confidence Intervals for ATE")
             ate_bootstraps = np.zeros(shape=(self.t_groups.shape[0], n_bootstraps))
 
             for n in tqdm(range(n_bootstraps)):
                 ate_b = self.bootstrap(X, treatment, y, size=bootstrap_size)
                 ate_bootstraps[:, n] = ate_b.mean()
 
-            ate_lower = np.percentile(ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1)
-            ate_upper = np.percentile(ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1)
+            ate_lower = np.percentile(
+                ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1
+            )
+            ate_upper = np.percentile(
+                ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -271,9 +300,8 @@ def __init__(self, learner=None, ate_alpha=0.05, control_name=0):
             control_name (str or int, optional): name of control group
         """
         super().__init__(
-            learner=learner,
-            ate_alpha=ate_alpha,
-            control_name=control_name)
+            learner=learner, ate_alpha=ate_alpha, control_name=control_name
+        )
 
 
 class BaseSClassifier(BaseSLearner):
@@ -289,11 +317,12 @@ def __init__(self, learner=None, ate_alpha=0.05, control_name=0):
             control_name (str or int, optional): name of control group
         """
         super().__init__(
-            learner=learner,
-            ate_alpha=ate_alpha,
-            control_name=control_name)
+            learner=learner, ate_alpha=ate_alpha, control_name=control_name
+        )
 
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False, verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         """Predict treatment effects.
         Args:
             X (np.matrix or np.array or pd.Dataframe): a feature matrix
@@ -329,7 +358,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
                 yhat[w == 0] = yhat_cs[group][mask][w == 0]
                 yhat[w == 1] = yhat_ts[group][mask][w == 1]
 
-                logger.info('Error metrics for group {}'.format(group))
+                logger.info("Error metrics for group {}".format(group))
                 classification_metrics(y_filt, yhat, w)
 
         te = np.zeros((X.shape[0], self.t_groups.shape[0]))
@@ -343,7 +372,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
 
 
 class LRSRegressor(BaseSRegressor):
-    def __init__(self, ate_alpha=.05, control_name=0):
+    def __init__(self, ate_alpha=0.05, control_name=0):
         """Initialize an S-learner with a linear regression model.
         Args:
             ate_alpha (float, optional): the confidence level alpha of the ATE estimate
diff --git a/causalml/inference/meta/tlearner.py b/causalml/inference/meta/tlearner.py
index 9f414885..c6cc2bc0 100644
--- a/causalml/inference/meta/tlearner.py
+++ b/causalml/inference/meta/tlearner.py
@@ -6,7 +6,8 @@
 import sklearn
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.neural_network import MLPRegressor
-if version.parse(sklearn.__version__) >= version.parse('0.22.0'):
+
+if version.parse(sklearn.__version__) >= version.parse("0.22.0"):
     from sklearn.utils._testing import ignore_warnings
 else:
     from sklearn.utils.testing import ignore_warnings
@@ -19,7 +20,7 @@
 from causalml.metrics import regression_metrics, classification_metrics
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 class BaseTLearner(BaseLearner):
@@ -30,7 +31,14 @@ class BaseTLearner(BaseLearner):
     Details of T-learner are available at Kunzel et al. (2018) (https://arxiv.org/abs/1706.03461).
     """
 
-    def __init__(self, learner=None, control_learner=None, treatment_learner=None, ate_alpha=.05, control_name=0):
+    def __init__(
+        self,
+        learner=None,
+        control_learner=None,
+        treatment_learner=None,
+        ate_alpha=0.05,
+        control_name=0,
+    ):
         """Initialize a T-learner.
 
         Args:
@@ -40,7 +48,9 @@ def __init__(self, learner=None, control_learner=None, treatment_learner=None, a
             ate_alpha (float, optional): the confidence level alpha of the ATE estimate
             control_name (str or int, optional): name of control group
         """
-        assert (learner is not None) or ((control_learner is not None) and (treatment_learner is not None))
+        assert (learner is not None) or (
+            (control_learner is not None) and (treatment_learner is not None)
+        )
 
         if control_learner is None:
             self.model_c = deepcopy(learner)
@@ -56,9 +66,9 @@ def __init__(self, learner=None, control_learner=None, treatment_learner=None, a
         self.control_name = control_name
 
     def __repr__(self):
-        return '{}(model_c={}, model_t={})'.format(self.__class__.__name__,
-                                                   self.model_c.__repr__(),
-                                                   self.model_t.__repr__())
+        return "{}(model_c={}, model_t={})".format(
+            self.__class__.__name__, self.model_c.__repr__(), self.model_t.__repr__()
+        )
 
     @ignore_warnings(category=ConvergenceWarning)
     def fit(self, X, treatment, y, p=None):
@@ -87,7 +97,9 @@ def fit(self, X, treatment, y, p=None):
             self.models_c[group].fit(X_filt[w == 0], y_filt[w == 0])
             self.models_t[group].fit(X_filt[w == 1], y_filt[w == 1])
 
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False, verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         """Predict treatment effects.
 
         Args:
@@ -119,7 +131,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
                 yhat[w == 0] = yhat_cs[group][mask][w == 0]
                 yhat[w == 1] = yhat_ts[group][mask][w == 1]
 
-                logger.info('Error metrics for group {}'.format(group))
+                logger.info("Error metrics for group {}".format(group))
                 regression_metrics(y_filt, yhat, w)
 
         te = np.zeros((X.shape[0], self.t_groups.shape[0]))
@@ -131,8 +143,18 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
         else:
             return te, yhat_cs, yhat_ts
 
-    def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=1000, bootstrap_size=10000,
-                    return_components=False, verbose=True):
+    def fit_predict(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        return_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+        return_components=False,
+        verbose=True,
+    ):
         """Fit the inference model of the T learner and predict treatment effects.
 
         Args:
@@ -160,15 +182,19 @@ def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=100
             _classes_global = self._classes
             models_c_global = deepcopy(self.models_c)
             models_t_global = deepcopy(self.models_t)
-            te_bootstraps = np.zeros(shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps))
+            te_bootstraps = np.zeros(
+                shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps)
+            )
 
-            logger.info('Bootstrap Confidence Intervals')
+            logger.info("Bootstrap Confidence Intervals")
             for i in tqdm(range(n_bootstraps)):
                 te_b = self.bootstrap(X, treatment, y, size=bootstrap_size)
                 te_bootstraps[:, :, i] = te_b
 
-            te_lower = np.percentile(te_bootstraps, (self.ate_alpha/2)*100, axis=2)
-            te_upper = np.percentile(te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2)
+            te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2)
+            te_upper = np.percentile(
+                te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -178,7 +204,16 @@ def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=100
 
             return (te, te_lower, te_upper)
 
-    def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps=1000, bootstrap_size=10000):
+    def estimate_ate(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        bootstrap_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+    ):
         """Estimate the Average Treatment Effect (ATE).
 
         Args:
@@ -210,13 +245,14 @@ def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps
             yhat_c = yhat_cs[group][mask]
             yhat_t = yhat_ts[group][mask]
 
-            se = np.sqrt((
-                (y_filt[w == 0] - yhat_c[w == 0]).var()
-                / (1 - prob_treatment) +
-                (y_filt[w == 1] - yhat_t[w == 1]).var()
-                / prob_treatment +
-                (yhat_t - yhat_c).var()
-            ) / y_filt.shape[0])
+            se = np.sqrt(
+                (
+                    (y_filt[w == 0] - yhat_c[w == 0]).var() / (1 - prob_treatment)
+                    + (y_filt[w == 1] - yhat_t[w == 1]).var() / prob_treatment
+                    + (yhat_t - yhat_c).var()
+                )
+                / y_filt.shape[0]
+            )
 
             _ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
             _ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)
@@ -233,15 +269,19 @@ def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps
             models_c_global = deepcopy(self.models_c)
             models_t_global = deepcopy(self.models_t)
 
-            logger.info('Bootstrap Confidence Intervals for ATE')
+            logger.info("Bootstrap Confidence Intervals for ATE")
             ate_bootstraps = np.zeros(shape=(self.t_groups.shape[0], n_bootstraps))
 
             for n in tqdm(range(n_bootstraps)):
                 ate_b = self.bootstrap(X, treatment, y, size=bootstrap_size)
                 ate_bootstraps[:, n] = ate_b.mean()
 
-            ate_lower = np.percentile(ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1)
-            ate_upper = np.percentile(ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1)
+            ate_lower = np.percentile(
+                ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1
+            )
+            ate_upper = np.percentile(
+                ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -257,12 +297,14 @@ class BaseTRegressor(BaseTLearner):
     A parent class for T-learner regressor classes.
     """
 
-    def __init__(self,
-                 learner=None,
-                 control_learner=None,
-                 treatment_learner=None,
-                 ate_alpha=.05,
-                 control_name=0):
+    def __init__(
+        self,
+        learner=None,
+        control_learner=None,
+        treatment_learner=None,
+        ate_alpha=0.05,
+        control_name=0,
+    ):
         """Initialize a T-learner regressor.
 
         Args:
@@ -277,7 +319,8 @@ def __init__(self,
             control_learner=control_learner,
             treatment_learner=treatment_learner,
             ate_alpha=ate_alpha,
-            control_name=control_name)
+            control_name=control_name,
+        )
 
 
 class BaseTClassifier(BaseTLearner):
@@ -285,12 +328,14 @@ class BaseTClassifier(BaseTLearner):
     A parent class for T-learner classifier classes.
     """
 
-    def __init__(self,
-                 learner=None,
-                 control_learner=None,
-                 treatment_learner=None,
-                 ate_alpha=.05,
-                 control_name=0):
+    def __init__(
+        self,
+        learner=None,
+        control_learner=None,
+        treatment_learner=None,
+        ate_alpha=0.05,
+        control_name=0,
+    ):
         """Initialize a T-learner classifier.
 
         Args:
@@ -305,9 +350,12 @@ def __init__(self,
             control_learner=control_learner,
             treatment_learner=treatment_learner,
             ate_alpha=ate_alpha,
-            control_name=control_name)
+            control_name=control_name,
+        )
 
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False, verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         """Predict treatment effects.
 
         Args:
@@ -337,7 +385,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
                 yhat[w == 0] = yhat_cs[group][mask][w == 0]
                 yhat[w == 1] = yhat_ts[group][mask][w == 1]
 
-                logger.info('Error metrics for group {}'.format(group))
+                logger.info("Error metrics for group {}".format(group))
                 classification_metrics(y_filt, yhat, w)
 
         te = np.zeros((X.shape[0], self.t_groups.shape[0]))
@@ -351,16 +399,20 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False, ve
 
 
 class XGBTRegressor(BaseTRegressor):
-    def __init__(self, ate_alpha=.05, control_name=0, *args, **kwargs):
+    def __init__(self, ate_alpha=0.05, control_name=0, *args, **kwargs):
         """Initialize a T-learner with two XGBoost models."""
-        super().__init__(learner=XGBRegressor(*args, **kwargs),
-                         ate_alpha=ate_alpha,
-                         control_name=control_name)
+        super().__init__(
+            learner=XGBRegressor(*args, **kwargs),
+            ate_alpha=ate_alpha,
+            control_name=control_name,
+        )
 
 
 class MLPTRegressor(BaseTRegressor):
-    def __init__(self, ate_alpha=.05, control_name=0, *args, **kwargs):
+    def __init__(self, ate_alpha=0.05, control_name=0, *args, **kwargs):
         """Initialize a T-learner with two MLP models."""
-        super().__init__(learner=MLPRegressor(*args, **kwargs),
-                         ate_alpha=ate_alpha,
-                         control_name=control_name)
+        super().__init__(
+            learner=MLPRegressor(*args, **kwargs),
+            ate_alpha=ate_alpha,
+            control_name=control_name,
+        )
diff --git a/causalml/inference/meta/tmle.py b/causalml/inference/meta/tmle.py
index 618aaff0..41e6008e 100644
--- a/causalml/inference/meta/tmle.py
+++ b/causalml/inference/meta/tmle.py
@@ -6,11 +6,15 @@
 from scipy.stats import norm
 from sklearn.preprocessing import MinMaxScaler
 
-from causalml.inference.meta.utils import check_treatment_vector, check_p_conditions, convert_pd_to_np
+from causalml.inference.meta.utils import (
+    check_treatment_vector,
+    check_p_conditions,
+    convert_pd_to_np,
+)
 from causalml.propensity import calibrate
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 def logit_tmle(x, y, a, h0, h1):
@@ -25,8 +29,12 @@ def logit_tmle_grad(x, y, a, h0, h1):
 
 def logit_tmle_hess(x, y, a, h0, h1):
     p = expit(a + x[0] * h0 + x[1] * h1)
-    return np.array([[np.mean(p * (1 - p) * h0 * h0), np.mean(p * (1 - p) * h0 * h1)],
-                     [np.mean(p * (1 - p) * h0 * h1), np.mean(p * (1 - p) * h1 * h1)]])
+    return np.array(
+        [
+            [np.mean(p * (1 - p) * h0 * h0), np.mean(p * (1 - p) * h0 * h1)],
+            [np.mean(p * (1 - p) * h0 * h1), np.mean(p * (1 - p) * h1 * h1)],
+        ]
+    )
 
 
 def simple_tmle(y, w, q0w, q1w, p, alpha=0.0001):
@@ -56,14 +64,31 @@ def simple_tmle(y, w, q0w, q1w, p, alpha=0.0001):
 
     h1 = w / p
     h0 = (1 - w) / (1 - p)
-    sol = minimize(logit_tmle, np.zeros(2), args=(ystar, intercept, h0, h1),
-                   method="Newton-CG", jac=logit_tmle_grad, hess=logit_tmle_hess)
-
-    qawstar = scaler.inverse_transform(expit(intercept + sol.x[0] * h0 + sol.x[1] * h1).reshape(-1, 1)).flatten()
-    q0star = scaler.inverse_transform(expit(logit(q0) + sol.x[0] / (1 - p)).reshape(-1, 1)).flatten()
-    q1star = scaler.inverse_transform(expit(logit(q1) + sol.x[1] / p).reshape(-1, 1)).flatten()
-
-    ic = (w / p - (1 - w) / (1 - p)) * (y - qawstar) + q1star - q0star - np.mean(q1star - q0star)
+    sol = minimize(
+        logit_tmle,
+        np.zeros(2),
+        args=(ystar, intercept, h0, h1),
+        method="Newton-CG",
+        jac=logit_tmle_grad,
+        hess=logit_tmle_hess,
+    )
+
+    qawstar = scaler.inverse_transform(
+        expit(intercept + sol.x[0] * h0 + sol.x[1] * h1).reshape(-1, 1)
+    ).flatten()
+    q0star = scaler.inverse_transform(
+        expit(logit(q0) + sol.x[0] / (1 - p)).reshape(-1, 1)
+    ).flatten()
+    q1star = scaler.inverse_transform(
+        expit(logit(q1) + sol.x[1] / p).reshape(-1, 1)
+    ).flatten()
+
+    ic = (
+        (w / p - (1 - w) / (1 - p)) * (y - qawstar)
+        + q1star
+        - q0star
+        - np.mean(q1star - q0star)
+    )
 
     return np.mean(q1star - q0star), np.sqrt(np.var(ic) / np.size(y))
 
@@ -74,7 +99,14 @@ class TMLELearner(object):
     Ref: Gruber, S., & Van Der Laan, M. J. (2009). Targeted maximum likelihood estimation: A gentle introduction.
     """
 
-    def __init__(self, learner, ate_alpha=.05, control_name=0, cv=None, calibrate_propensity=True):
+    def __init__(
+        self,
+        learner,
+        ate_alpha=0.05,
+        control_name=0,
+        cv=None,
+        calibrate_propensity=True,
+    ):
         """Initialize a TMLE learner.
 
         Args:
@@ -90,7 +122,9 @@ def __init__(self, learner, ate_alpha=.05, control_name=0, cv=None, calibrate_pr
         self.calibrate_propensity = calibrate_propensity
 
     def __repr__(self):
-        return '{}(model={}, cv={})'.format(self.__class__.__name__, self.model_tau.__repr__(), self.cv)
+        return "{}(model={}, cv={})".format(
+            self.__class__.__name__, self.model_tau.__repr__(), self.cv
+        )
 
     def estimate_ate(self, X, treatment, y, p, segment=None, return_ci=False):
         """Estimate the Average Treatment Effect (ATE).
@@ -118,30 +152,38 @@ def estimate_ate(self, X, treatment, y, p, segment=None, return_ci=False):
             treatment_name = self.t_groups[0]
             p = {treatment_name: convert_pd_to_np(p)}
         elif isinstance(p, dict):
-            p = {treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()}
+            p = {
+                treatment_name: convert_pd_to_np(_p) for treatment_name, _p in p.items()
+            }
 
         ate = []
         ate_lb = []
         ate_ub = []
 
         for i, group in enumerate(self.t_groups):
-            logger.info('Estimating ATE for group {}.'.format(group))
+            logger.info("Estimating ATE for group {}.".format(group))
             w_group = (treatment == group).astype(int)
             p_group = p[group]
 
             if self.calibrate_propensity:
-                logger.info('Calibrating propensity scores.')
+                logger.info("Calibrating propensity scores.")
                 p_group = calibrate(p_group, w_group)
 
             yhat_c = np.zeros_like(y, dtype=float)
             yhat_t = np.zeros_like(y, dtype=float)
             if self.cv:
                 for i_fold, (i_trn, i_val) in enumerate(self.cv.split(X, y), 1):
-                    logger.info('Training an outcome model for CV #{}'.format(i_fold))
-                    self.model_tau.fit(np.hstack((X[i_trn], w_group[i_trn].reshape(-1, 1))), y[i_trn])
-
-                    yhat_c[i_val] = self.model_tau.predict(np.hstack((X[i_val], np.zeros((len(i_val), 1)))))
-                    yhat_t[i_val] = self.model_tau.predict(np.hstack((X[i_val], np.ones((len(i_val), 1)))))
+                    logger.info("Training an outcome model for CV #{}".format(i_fold))
+                    self.model_tau.fit(
+                        np.hstack((X[i_trn], w_group[i_trn].reshape(-1, 1))), y[i_trn]
+                    )
+
+                    yhat_c[i_val] = self.model_tau.predict(
+                        np.hstack((X[i_val], np.zeros((len(i_val), 1))))
+                    )
+                    yhat_t[i_val] = self.model_tau.predict(
+                        np.hstack((X[i_val], np.ones((len(i_val), 1))))
+                    )
 
             else:
                 self.model_tau.fit(np.hstack((X, w_group.reshape(-1, 1))), y)
@@ -150,21 +192,29 @@ def estimate_ate(self, X, treatment, y, p, segment=None, return_ci=False):
                 yhat_t = self.model_tau.predict(np.hstack((X, np.ones((len(y), 1)))))
 
             if segment is None:
-                logger.info('Training the TMLE learner.')
+                logger.info("Training the TMLE learner.")
                 _ate, se = simple_tmle(y, w_group, yhat_c, yhat_t, p_group)
                 _ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
                 _ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)
             else:
-                assert segment.shape[0] == X.shape[0] and segment.ndim == 1, 'Segment must be the 1-d np.array of int.'
+                assert (
+                    segment.shape[0] == X.shape[0] and segment.ndim == 1
+                ), "Segment must be the 1-d np.array of int."
                 segments = np.unique(segment)
 
                 _ate = []
                 _ate_lb = []
                 _ate_ub = []
                 for s in sorted(segments):
-                    logger.info('Training the TMLE learner for segment {}.'.format(s))
-                    filt = (segment == s) & (yhat_c < np.quantile(yhat_c, q=.99))
-                    _ate_s, se = simple_tmle(y[filt], w_group[filt], yhat_c[filt], yhat_t[filt], p_group[filt])
+                    logger.info("Training the TMLE learner for segment {}.".format(s))
+                    filt = (segment == s) & (yhat_c < np.quantile(yhat_c, q=0.99))
+                    _ate_s, se = simple_tmle(
+                        y[filt],
+                        w_group[filt],
+                        yhat_c[filt],
+                        yhat_t[filt],
+                        p_group[filt],
+                    )
                     _ate_lb_s = _ate_s - se * norm.ppf(1 - self.ate_alpha / 2)
                     _ate_ub_s = _ate_s + se * norm.ppf(1 - self.ate_alpha / 2)
 
diff --git a/causalml/inference/meta/utils.py b/causalml/inference/meta/utils.py
index 895348e2..51b87424 100644
--- a/causalml/inference/meta/utils.py
+++ b/causalml/inference/meta/utils.py
@@ -12,40 +12,51 @@ def convert_pd_to_np(*args):
 
 def check_treatment_vector(treatment, control_name=None):
     n_unique_treatments = np.unique(treatment).shape[0]
-    assert n_unique_treatments > 1, \
-        'Treatment vector must have at least two levels.'
+    assert n_unique_treatments > 1, "Treatment vector must have at least two levels."
     if control_name is not None:
-        assert control_name in treatment, \
-            'Control group level {} not found in treatment vector.'.format(control_name)
+        assert (
+            control_name in treatment
+        ), "Control group level {} not found in treatment vector.".format(control_name)
 
 
 def check_p_conditions(p, t_groups):
     eps = np.finfo(float).eps
-    assert isinstance(p, (np.ndarray, pd.Series, dict)), \
-        'p must be an np.ndarray, pd.Series, or dict type'
+    assert isinstance(
+        p, (np.ndarray, pd.Series, dict)
+    ), "p must be an np.ndarray, pd.Series, or dict type"
     if isinstance(p, (np.ndarray, pd.Series)):
-        assert t_groups.shape[0] == 1, \
-            'If p is passed as an np.ndarray, there must be only 1 unique non-control group in the treatment vector.'
-        assert (0 + eps < p).all() and (p < 1 - eps).all(), \
-            'The values of p should lie within the (0, 1) interval.'
+        assert (
+            t_groups.shape[0] == 1
+        ), "If p is passed as an np.ndarray, there must be only 1 unique non-control group in the treatment vector."
+        assert (0 + eps < p).all() and (
+            p < 1 - eps
+        ).all(), "The values of p should lie within the (0, 1) interval."
 
     if isinstance(p, dict):
         for t_name in t_groups:
-            assert (0 + eps < p[t_name]).all() and (p[t_name] < 1 - eps).all(), \
-                'The values of p should lie within the (0, 1) interval.'
+            assert (0 + eps < p[t_name]).all() and (
+                p[t_name] < 1 - eps
+            ).all(), "The values of p should lie within the (0, 1) interval."
 
 
 def check_explain_conditions(method, models, X=None, treatment=None, y=None):
-    valid_methods = ['gini', 'permutation', 'shapley']
-    assert method in valid_methods, 'Current supported methods: {}'.format(', '.join(valid_methods))
+    valid_methods = ["gini", "permutation", "shapley"]
+    assert method in valid_methods, "Current supported methods: {}".format(
+        ", ".join(valid_methods)
+    )
 
-    if method in ('gini', 'shapley'):
+    if method in ("gini", "shapley"):
         conds = [hasattr(mod, "feature_importances_") for mod in models]
-        assert all(conds), "Both models must have .feature_importances_ attribute if method = {}".format(method)
+        assert all(
+            conds
+        ), "Both models must have .feature_importances_ attribute if method = {}".format(
+            method
+        )
 
-    if method in ('permutation', 'shapley'):
-        assert all(arr is not None for arr in (X, treatment, y)), \
-            "X, treatment, and y must be provided if method = {}".format(method)
+    if method in ("permutation", "shapley"):
+        assert all(
+            arr is not None for arr in (X, treatment, y)
+        ), "X, treatment, and y must be provided if method = {}".format(method)
 
 
 def clean_xgboost_objective(objective):
@@ -62,9 +73,9 @@ def clean_xgboost_objective(objective):
     -------
     The translated objective, or original if no translation was required.
     """
-    compat_before_v83 = {'reg:squarederror': 'reg:linear'}
-    compat_v83_or_later = {'reg:linear': 'reg:squarederror'}
-    if version.parse(xgboost_version) < version.parse('0.83'):
+    compat_before_v83 = {"reg:squarederror": "reg:linear"}
+    compat_v83_or_later = {"reg:linear": "reg:squarederror"}
+    if version.parse(xgboost_version) < version.parse("0.83"):
         if objective in compat_before_v83:
             objective = compat_before_v83[objective]
     else:
@@ -87,20 +98,25 @@ def get_xgboost_objective_metric(objective):
     -------
     A tuple with the translated objective and evaluation metric.
     """
+
     def clean_dict_keys(orig):
         return {clean_xgboost_objective(k): v for (k, v) in orig.items()}
 
-    metric_mapping = clean_dict_keys({
-        'rank:pairwise': 'auc',
-        'reg:squarederror': 'rmse',
-    })
+    metric_mapping = clean_dict_keys(
+        {
+            "rank:pairwise": "auc",
+            "reg:squarederror": "rmse",
+        }
+    )
 
     objective = clean_xgboost_objective(objective)
 
-    assert (objective in metric_mapping), \
-        'Effect learner objective must be one of: ' + ", ".join(metric_mapping)
+    assert (
+        objective in metric_mapping
+    ), "Effect learner objective must be one of: " + ", ".join(metric_mapping)
     return objective, metric_mapping[objective]
 
+
 def get_weighted_variance(x, sample_weight):
     """
     Calculate the variance of array x with sample_weight.
@@ -119,6 +135,5 @@ def get_weighted_variance(x, sample_weight):
     The variance of x with sample weight
     """
     average = np.average(x, weights=sample_weight)
-    variance = np.average((x-average)**2, weights=sample_weight)
+    variance = np.average((x - average) ** 2, weights=sample_weight)
     return variance
-
diff --git a/causalml/inference/meta/xlearner.py b/causalml/inference/meta/xlearner.py
index 302c5e1d..38f37a20 100644
--- a/causalml/inference/meta/xlearner.py
+++ b/causalml/inference/meta/xlearner.py
@@ -6,12 +6,16 @@
 from scipy.stats import norm
 
 from causalml.inference.meta.base import BaseLearner
-from causalml.inference.meta.utils import check_treatment_vector, check_p_conditions, convert_pd_to_np
+from causalml.inference.meta.utils import (
+    check_treatment_vector,
+    check_p_conditions,
+    convert_pd_to_np,
+)
 from causalml.inference.meta.explainer import Explainer
 from causalml.metrics import regression_metrics, classification_metrics
 from causalml.propensity import compute_propensity_score
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 class BaseXLearner(BaseLearner):
@@ -22,14 +26,16 @@ class BaseXLearner(BaseLearner):
     Details of X-learner are available at Kunzel et al. (2018) (https://arxiv.org/abs/1706.03461).
     """
 
-    def __init__(self,
-                 learner=None,
-                 control_outcome_learner=None,
-                 treatment_outcome_learner=None,
-                 control_effect_learner=None,
-                 treatment_effect_learner=None,
-                 ate_alpha=.05,
-                 control_name=0):
+    def __init__(
+        self,
+        learner=None,
+        control_outcome_learner=None,
+        treatment_outcome_learner=None,
+        control_effect_learner=None,
+        treatment_effect_learner=None,
+        ate_alpha=0.05,
+        control_name=0,
+    ):
         """Initialize a X-learner.
 
         Args:
@@ -42,10 +48,12 @@ def __init__(self,
             ate_alpha (float, optional): the confidence level alpha of the ATE estimate
             control_name (str or int, optional): name of control group
         """
-        assert (learner is not None) or ((control_outcome_learner is not None) and
-                                         (treatment_outcome_learner is not None) and
-                                         (control_effect_learner is not None) and
-                                         (treatment_effect_learner is not None))
+        assert (learner is not None) or (
+            (control_outcome_learner is not None)
+            and (treatment_outcome_learner is not None)
+            and (control_effect_learner is not None)
+            and (treatment_effect_learner is not None)
+        )
 
         if control_outcome_learner is None:
             self.model_mu_c = deepcopy(learner)
@@ -74,14 +82,18 @@ def __init__(self,
         self.propensity_model = None
 
     def __repr__(self):
-        return ('{}(control_outcome_learner={},\n'
-                '\ttreatment_outcome_learner={},\n'
-                '\tcontrol_effect_learner={},\n'
-                '\ttreatment_effect_learner={})'.format(self.__class__.__name__,
-                                                        self.model_mu_c.__repr__(),
-                                                        self.model_mu_t.__repr__(),
-                                                        self.model_tau_c.__repr__(),
-                                                        self.model_tau_t.__repr__()))
+        return (
+            "{}(control_outcome_learner={},\n"
+            "\ttreatment_outcome_learner={},\n"
+            "\tcontrol_effect_learner={},\n"
+            "\ttreatment_effect_learner={})".format(
+                self.__class__.__name__,
+                self.model_mu_c.__repr__(),
+                self.model_mu_t.__repr__(),
+                self.model_tau_c.__repr__(),
+                self.model_tau_t.__repr__(),
+            )
+        )
 
     def fit(self, X, treatment, y, p=None):
         """Fit the inference model.
@@ -108,8 +120,12 @@ def fit(self, X, treatment, y, p=None):
         self._classes = {group: i for i, group in enumerate(self.t_groups)}
         self.models_mu_c = {group: deepcopy(self.model_mu_c) for group in self.t_groups}
         self.models_mu_t = {group: deepcopy(self.model_mu_t) for group in self.t_groups}
-        self.models_tau_c = {group: deepcopy(self.model_tau_c) for group in self.t_groups}
-        self.models_tau_t = {group: deepcopy(self.model_tau_t) for group in self.t_groups}
+        self.models_tau_c = {
+            group: deepcopy(self.model_tau_c) for group in self.t_groups
+        }
+        self.models_tau_t = {
+            group: deepcopy(self.model_tau_t) for group in self.t_groups
+        }
         self.vars_c = {}
         self.vars_t = {}
 
@@ -125,9 +141,13 @@ def fit(self, X, treatment, y, p=None):
             self.models_mu_t[group].fit(X_filt[w == 1], y_filt[w == 1])
 
             # Calculate variances and treatment effects
-            var_c = (y_filt[w == 0] - self.models_mu_c[group].predict(X_filt[w == 0])).var()
+            var_c = (
+                y_filt[w == 0] - self.models_mu_c[group].predict(X_filt[w == 0])
+            ).var()
             self.vars_c[group] = var_c
-            var_t = (y_filt[w == 1] - self.models_mu_t[group].predict(X_filt[w == 1])).var()
+            var_t = (
+                y_filt[w == 1] - self.models_mu_t[group].predict(X_filt[w == 1])
+            ).var()
             self.vars_t[group] = var_t
 
             # Train treatment models
@@ -136,8 +156,9 @@ def fit(self, X, treatment, y, p=None):
             self.models_tau_c[group].fit(X_filt[w == 0], d_c)
             self.models_tau_t[group].fit(X_filt[w == 1], d_t)
 
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False,
-                verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         """Predict treatment effects.
 
         Args:
@@ -155,7 +176,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False,
         X, treatment, y = convert_pd_to_np(X, treatment, y)
 
         if p is None:
-            logger.info('Generating propensity score')
+            logger.info("Generating propensity score")
             p = dict()
             for group in self.t_groups:
                 p_model = self.propensity_model[group]
@@ -173,7 +194,9 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False,
             dhat_cs[group] = model_tau_c.predict(X)
             dhat_ts[group] = model_tau_t.predict(X)
 
-            _te = (p[group] * dhat_cs[group] + (1 - p[group]) * dhat_ts[group]).reshape(-1, 1)
+            _te = (p[group] * dhat_cs[group] + (1 - p[group]) * dhat_ts[group]).reshape(
+                -1, 1
+            )
             te[:, i] = np.ravel(_te)
 
             if (y is not None) and (treatment is not None) and verbose:
@@ -187,7 +210,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False,
                 yhat[w == 0] = self.models_mu_c[group].predict(X_filt[w == 0])
                 yhat[w == 1] = self.models_mu_t[group].predict(X_filt[w == 1])
 
-                logger.info('Error metrics for group {}'.format(group))
+                logger.info("Error metrics for group {}".format(group))
                 regression_metrics(y_filt, yhat, w)
 
         if not return_components:
@@ -195,8 +218,18 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False,
         else:
             return te, dhat_cs, dhat_ts
 
-    def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=1000, bootstrap_size=10000,
-                    return_components=False, verbose=True):
+    def fit_predict(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        return_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+        return_components=False,
+        verbose=True,
+    ):
         """Fit the treatment effect and outcome models of the R learner and predict treatment effects.
 
         Args:
@@ -224,7 +257,9 @@ def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=100
         else:
             p = self._format_p(p, self.t_groups)
 
-        te = self.predict(X, treatment=treatment, y=y, p=p, return_components=return_components)
+        te = self.predict(
+            X, treatment=treatment, y=y, p=p, return_components=return_components
+        )
 
         if not return_ci:
             return te
@@ -235,15 +270,19 @@ def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=100
             models_mu_t_global = deepcopy(self.models_mu_t)
             models_tau_c_global = deepcopy(self.models_tau_c)
             models_tau_t_global = deepcopy(self.models_tau_t)
-            te_bootstraps = np.zeros(shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps))
+            te_bootstraps = np.zeros(
+                shape=(X.shape[0], self.t_groups.shape[0], n_bootstraps)
+            )
 
-            logger.info('Bootstrap Confidence Intervals')
+            logger.info("Bootstrap Confidence Intervals")
             for i in tqdm(range(n_bootstraps)):
                 te_b = self.bootstrap(X, treatment, y, p, size=bootstrap_size)
                 te_bootstraps[:, :, i] = te_b
 
             te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2)
-            te_upper = np.percentile(te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2)
+            te_upper = np.percentile(
+                te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -255,7 +294,16 @@ def fit_predict(self, X, treatment, y, p=None, return_ci=False, n_bootstraps=100
 
             return (te, te_lower, te_upper)
 
-    def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps=1000, bootstrap_size=10000):
+    def estimate_ate(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        bootstrap_ci=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+    ):
         """Estimate the Average Treatment Effect (ATE).
 
         Args:
@@ -271,7 +319,9 @@ def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps
         Returns:
             The mean and confidence interval (LB, UB) of the ATE estimate.
         """
-        te, dhat_cs, dhat_ts = self.fit_predict(X, treatment, y, p, return_components=True)
+        te, dhat_cs, dhat_ts = self.fit_predict(
+            X, treatment, y, p, return_components=True
+        )
         X, treatment, y = convert_pd_to_np(X, treatment, y)
 
         if p is None:
@@ -297,10 +347,14 @@ def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps
 
             # SE formula is based on the lower bound formula (7) from Imbens, Guido W., and Jeffrey M. Wooldridge. 2009.
             # "Recent Developments in the Econometrics of Program Evaluation." Journal of Economic Literature
-            se = np.sqrt((
-                self.vars_t[group] / prob_treatment + self.vars_c[group] / (1 - prob_treatment) +
-                (p_filt * dhat_c + (1 - p_filt) * dhat_t).var()
-            ) / w.shape[0])
+            se = np.sqrt(
+                (
+                    self.vars_t[group] / prob_treatment
+                    + self.vars_c[group] / (1 - prob_treatment)
+                    + (p_filt * dhat_c + (1 - p_filt) * dhat_t).var()
+                )
+                / w.shape[0]
+            )
 
             _ate_lb = _ate - se * norm.ppf(1 - self.ate_alpha / 2)
             _ate_ub = _ate + se * norm.ppf(1 - self.ate_alpha / 2)
@@ -319,15 +373,19 @@ def estimate_ate(self, X, treatment, y, p=None, bootstrap_ci=False, n_bootstraps
             models_tau_c_global = deepcopy(self.models_tau_c)
             models_tau_t_global = deepcopy(self.models_tau_t)
 
-            logger.info('Bootstrap Confidence Intervals for ATE')
+            logger.info("Bootstrap Confidence Intervals for ATE")
             ate_bootstraps = np.zeros(shape=(self.t_groups.shape[0], n_bootstraps))
 
             for n in tqdm(range(n_bootstraps)):
                 cate_b = self.bootstrap(X, treatment, y, p, size=bootstrap_size)
                 ate_bootstraps[:, n] = cate_b.mean()
 
-            ate_lower = np.percentile(ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1)
-            ate_upper = np.percentile(ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1)
+            ate_lower = np.percentile(
+                ate_bootstraps, (self.ate_alpha / 2) * 100, axis=1
+            )
+            ate_upper = np.percentile(
+                ate_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=1
+            )
 
             # set member variables back to global (currently last bootstrapped outcome)
             self.t_groups = t_groups_global
@@ -344,14 +402,16 @@ class BaseXRegressor(BaseXLearner):
     A parent class for X-learner regressor classes.
     """
 
-    def __init__(self,
-                 learner=None,
-                 control_outcome_learner=None,
-                 treatment_outcome_learner=None,
-                 control_effect_learner=None,
-                 treatment_effect_learner=None,
-                 ate_alpha=.05,
-                 control_name=0):
+    def __init__(
+        self,
+        learner=None,
+        control_outcome_learner=None,
+        treatment_outcome_learner=None,
+        control_effect_learner=None,
+        treatment_effect_learner=None,
+        ate_alpha=0.05,
+        control_name=0,
+    ):
         """Initialize an X-learner regressor.
 
         Args:
@@ -371,7 +431,8 @@ def __init__(self,
             control_effect_learner=control_effect_learner,
             treatment_effect_learner=treatment_effect_learner,
             ate_alpha=ate_alpha,
-            control_name=control_name)
+            control_name=control_name,
+        )
 
 
 class BaseXClassifier(BaseXLearner):
@@ -379,15 +440,17 @@ class BaseXClassifier(BaseXLearner):
     A parent class for X-learner classifier classes.
     """
 
-    def __init__(self,
-                 outcome_learner=None,
-                 effect_learner=None,
-                 control_outcome_learner=None,
-                 treatment_outcome_learner=None,
-                 control_effect_learner=None,
-                 treatment_effect_learner=None,
-                 ate_alpha=.05,
-                 control_name=0):
+    def __init__(
+        self,
+        outcome_learner=None,
+        effect_learner=None,
+        control_outcome_learner=None,
+        treatment_outcome_learner=None,
+        control_effect_learner=None,
+        treatment_effect_learner=None,
+        ate_alpha=0.05,
+        control_name=0,
+    ):
         """Initialize an X-learner classifier.
 
         Args:
@@ -420,11 +483,15 @@ def __init__(self,
             control_effect_learner=control_effect_learner,
             treatment_effect_learner=treatment_effect_learner,
             ate_alpha=ate_alpha,
-            control_name=control_name)
+            control_name=control_name,
+        )
 
-        if ((control_outcome_learner is None) or (treatment_outcome_learner is None)) and (
-                (control_effect_learner is None) or (treatment_effect_learner is None)):
-            raise ValueError("Either the outcome learner or the effect learner pair must be specified.")
+        if (
+            (control_outcome_learner is None) or (treatment_outcome_learner is None)
+        ) and ((control_effect_learner is None) or (treatment_effect_learner is None)):
+            raise ValueError(
+                "Either the outcome learner or the effect learner pair must be specified."
+            )
 
     def fit(self, X, treatment, y, p=None):
         """Fit the inference model.
@@ -451,8 +518,12 @@ def fit(self, X, treatment, y, p=None):
         self._classes = {group: i for i, group in enumerate(self.t_groups)}
         self.models_mu_c = {group: deepcopy(self.model_mu_c) for group in self.t_groups}
         self.models_mu_t = {group: deepcopy(self.model_mu_t) for group in self.t_groups}
-        self.models_tau_c = {group: deepcopy(self.model_tau_c) for group in self.t_groups}
-        self.models_tau_t = {group: deepcopy(self.model_tau_t) for group in self.t_groups}
+        self.models_tau_c = {
+            group: deepcopy(self.model_tau_c) for group in self.t_groups
+        }
+        self.models_tau_t = {
+            group: deepcopy(self.model_tau_t) for group in self.t_groups
+        }
         self.vars_c = {}
         self.vars_t = {}
 
@@ -468,19 +539,32 @@ def fit(self, X, treatment, y, p=None):
             self.models_mu_t[group].fit(X_filt[w == 1], y_filt[w == 1])
 
             # Calculate variances and treatment effects
-            var_c = (y_filt[w == 0] - self.models_mu_c[group].predict_proba(X_filt[w == 0])[:, 1]).var()
+            var_c = (
+                y_filt[w == 0]
+                - self.models_mu_c[group].predict_proba(X_filt[w == 0])[:, 1]
+            ).var()
             self.vars_c[group] = var_c
-            var_t = (y_filt[w == 1] - self.models_mu_t[group].predict_proba(X_filt[w == 1])[:, 1]).var()
+            var_t = (
+                y_filt[w == 1]
+                - self.models_mu_t[group].predict_proba(X_filt[w == 1])[:, 1]
+            ).var()
             self.vars_t[group] = var_t
 
             # Train treatment models
-            d_c = self.models_mu_t[group].predict_proba(X_filt[w == 0])[:, 1] - y_filt[w == 0]
-            d_t = y_filt[w == 1] - self.models_mu_c[group].predict_proba(X_filt[w == 1])[:, 1]
+            d_c = (
+                self.models_mu_t[group].predict_proba(X_filt[w == 0])[:, 1]
+                - y_filt[w == 0]
+            )
+            d_t = (
+                y_filt[w == 1]
+                - self.models_mu_c[group].predict_proba(X_filt[w == 1])[:, 1]
+            )
             self.models_tau_c[group].fit(X_filt[w == 0], d_c)
             self.models_tau_t[group].fit(X_filt[w == 1], d_t)
 
-    def predict(self, X, treatment=None, y=None, p=None, return_components=False,
-                verbose=True):
+    def predict(
+        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+    ):
         """Predict treatment effects.
 
         Args:
@@ -499,7 +583,7 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False,
         X, treatment, y = convert_pd_to_np(X, treatment, y)
 
         if p is None:
-            logger.info('Generating propensity score')
+            logger.info("Generating propensity score")
             p = dict()
             for group in self.t_groups:
                 p_model = self.propensity_model[group]
@@ -517,7 +601,9 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False,
             dhat_cs[group] = model_tau_c.predict(X)
             dhat_ts[group] = model_tau_t.predict(X)
 
-            _te = (p[group] * dhat_cs[group] + (1 - p[group]) * dhat_ts[group]).reshape(-1, 1)
+            _te = (p[group] * dhat_cs[group] + (1 - p[group]) * dhat_ts[group]).reshape(
+                -1, 1
+            )
             te[:, i] = np.ravel(_te)
 
             if (y is not None) and (treatment is not None) and verbose:
@@ -528,10 +614,14 @@ def predict(self, X, treatment=None, y=None, p=None, return_components=False,
                 w = (treatment_filt == group).astype(int)
 
                 yhat = np.zeros_like(y_filt, dtype=float)
-                yhat[w == 0] = self.models_mu_c[group].predict_proba(X_filt[w == 0])[:, 1]
-                yhat[w == 1] = self.models_mu_t[group].predict_proba(X_filt[w == 1])[:, 1]
-
-                logger.info('Error metrics for group {}'.format(group))
+                yhat[w == 0] = self.models_mu_c[group].predict_proba(X_filt[w == 0])[
+                    :, 1
+                ]
+                yhat[w == 1] = self.models_mu_t[group].predict_proba(X_filt[w == 1])[
+                    :, 1
+                ]
+
+                logger.info("Error metrics for group {}".format(group))
                 classification_metrics(y_filt, yhat, w)
 
         if not return_components:
diff --git a/causalml/inference/nn/cevae.py b/causalml/inference/nn/cevae.py
index 83de4019..3e7b0f79 100644
--- a/causalml/inference/nn/cevae.py
+++ b/causalml/inference/nn/cevae.py
@@ -35,8 +35,19 @@
 
 
 class CEVAE:
-    def __init__(self, outcome_dist="studentt", latent_dim=20, hidden_dim=200, num_epochs=50, num_layers=3,
-                 batch_size=100, learning_rate=1e-3, learning_rate_decay=0.1, num_samples=1000, weight_decay=1e-4):
+    def __init__(
+        self,
+        outcome_dist="studentt",
+        latent_dim=20,
+        hidden_dim=200,
+        num_epochs=50,
+        num_layers=3,
+        batch_size=100,
+        learning_rate=1e-3,
+        learning_rate_decay=0.1,
+        num_samples=1000,
+        weight_decay=1e-4,
+    ):
         """
         Initializes CEVAE.
 
@@ -78,20 +89,24 @@ def fit(self, X, treatment, y, p=None):
         """
         X, treatment, y = convert_pd_to_np(X, treatment, y)
 
-        self.cevae = CEVAEModel(outcome_dist=self.outcome_dist,
-                           feature_dim=X.shape[-1],
-                           latent_dim=self.latent_dim,
-                           hidden_dim=self.hidden_dim,
-                           num_layers=self.num_layers)
-
-        self.cevae.fit(x=torch.tensor(X, dtype=torch.float),
-                       t=torch.tensor(treatment, dtype=torch.float),
-                       y=torch.tensor(y, dtype=torch.float),
-                       num_epochs=self.num_epochs,
-                       batch_size=self.batch_size,
-                       learning_rate=self.learning_rate,
-                       learning_rate_decay=self.learning_rate_decay,
-                       weight_decay=self.weight_decay)
+        self.cevae = CEVAEModel(
+            outcome_dist=self.outcome_dist,
+            feature_dim=X.shape[-1],
+            latent_dim=self.latent_dim,
+            hidden_dim=self.hidden_dim,
+            num_layers=self.num_layers,
+        )
+
+        self.cevae.fit(
+            x=torch.tensor(X, dtype=torch.float),
+            t=torch.tensor(treatment, dtype=torch.float),
+            y=torch.tensor(y, dtype=torch.float),
+            num_epochs=self.num_epochs,
+            batch_size=self.batch_size,
+            learning_rate=self.learning_rate,
+            learning_rate_decay=self.learning_rate_decay,
+            weight_decay=self.weight_decay,
+        )
 
     def predict(self, X, treatment=None, y=None, p=None):
         """
@@ -102,9 +117,15 @@ def predict(self, X, treatment=None, y=None, p=None):
         Returns:
             (np.ndarray): Predictions of treatment effects.
         """
-        return self.cevae.ite(torch.tensor(X, dtype=torch.float),
-                              num_samples=self.num_samples,
-                              batch_size=self.batch_size).cpu().numpy()
+        return (
+            self.cevae.ite(
+                torch.tensor(X, dtype=torch.float),
+                num_samples=self.num_samples,
+                batch_size=self.batch_size,
+            )
+            .cpu()
+            .numpy()
+        )
 
     def fit_predict(self, X, treatment, y, p=None):
         """
diff --git a/causalml/inference/tf/dragonnet.py b/causalml/inference/tf/dragonnet.py
index 4ff55899..95da490c 100644
--- a/causalml/inference/tf/dragonnet.py
+++ b/causalml/inference/tf/dragonnet.py
@@ -23,15 +23,31 @@
 from tensorflow.keras.regularizers import l2
 
 from causalml.inference.tf.utils import (
-    dragonnet_loss_binarycross, EpsilonLayer, regression_loss, binary_classification_loss,
-    treatment_accuracy, track_epsilon, make_tarreg_loss)
+    dragonnet_loss_binarycross,
+    EpsilonLayer,
+    regression_loss,
+    binary_classification_loss,
+    treatment_accuracy,
+    track_epsilon,
+    make_tarreg_loss,
+)
 from causalml.inference.meta.utils import convert_pd_to_np
 
 
 class DragonNet:
-    def __init__(self, neurons_per_layer=200, targeted_reg=True, ratio=1., val_split=0.2,
-                 batch_size=64, epochs=30, learning_rate=1e-3, reg_l2=0.01, loss_func=dragonnet_loss_binarycross,
-                 verbose=True):
+    def __init__(
+        self,
+        neurons_per_layer=200,
+        targeted_reg=True,
+        ratio=1.0,
+        val_split=0.2,
+        batch_size=64,
+        epochs=30,
+        learning_rate=1e-3,
+        reg_l2=0.01,
+        loss_func=dragonnet_loss_binarycross,
+        verbose=True,
+    ):
         """
         Initializes a Dragonnet.
         """
@@ -55,44 +71,70 @@ def make_dragonnet(self, input_dim):
         Returns:
             model (keras.models.Model): DragonNet model
         """
-        inputs = Input(shape=(input_dim,), name='input')
+        inputs = Input(shape=(input_dim,), name="input")
 
         # representation
-        x = Dense(units=self.neurons_per_layer, activation='elu', kernel_initializer='RandomNormal')(inputs)
-        x = Dense(units=self.neurons_per_layer, activation='elu', kernel_initializer='RandomNormal')(x)
-        x = Dense(units=self.neurons_per_layer, activation='elu', kernel_initializer='RandomNormal')(x)
-
-        t_predictions = Dense(units=1, activation='sigmoid')(x)
+        x = Dense(
+            units=self.neurons_per_layer,
+            activation="elu",
+            kernel_initializer="RandomNormal",
+        )(inputs)
+        x = Dense(
+            units=self.neurons_per_layer,
+            activation="elu",
+            kernel_initializer="RandomNormal",
+        )(x)
+        x = Dense(
+            units=self.neurons_per_layer,
+            activation="elu",
+            kernel_initializer="RandomNormal",
+        )(x)
+
+        t_predictions = Dense(units=1, activation="sigmoid")(x)
 
         # HYPOTHESIS
-        y0_hidden = Dense(units=int(self.neurons_per_layer / 2),
-                          activation='elu',
-                          kernel_regularizer=l2(self.reg_l2))(x)
-        y1_hidden = Dense(units=int(self.neurons_per_layer/2),
-                          activation='elu',
-                          kernel_regularizer=l2(self.reg_l2))(x)
+        y0_hidden = Dense(
+            units=int(self.neurons_per_layer / 2),
+            activation="elu",
+            kernel_regularizer=l2(self.reg_l2),
+        )(x)
+        y1_hidden = Dense(
+            units=int(self.neurons_per_layer / 2),
+            activation="elu",
+            kernel_regularizer=l2(self.reg_l2),
+        )(x)
 
         # second layer
-        y0_hidden = Dense(units=int(self.neurons_per_layer/2),
-                        activation='elu',
-                        kernel_regularizer=l2(self.reg_l2))(y0_hidden)
-        y1_hidden = Dense(units=int(self.neurons_per_layer / 2),
-                        activation='elu',
-                        kernel_regularizer=l2(self.reg_l2))(y1_hidden)
+        y0_hidden = Dense(
+            units=int(self.neurons_per_layer / 2),
+            activation="elu",
+            kernel_regularizer=l2(self.reg_l2),
+        )(y0_hidden)
+        y1_hidden = Dense(
+            units=int(self.neurons_per_layer / 2),
+            activation="elu",
+            kernel_regularizer=l2(self.reg_l2),
+        )(y1_hidden)
 
         # third
-        y0_predictions = Dense(units=1,
-                               activation=None,
-                               kernel_regularizer=l2(self.reg_l2),
-                               name='y0_predictions')(y0_hidden)
-        y1_predictions = Dense(units=1,
-                               activation=None,
-                               kernel_regularizer=l2(self.reg_l2),
-                               name='y1_predictions')(y1_hidden)
+        y0_predictions = Dense(
+            units=1,
+            activation=None,
+            kernel_regularizer=l2(self.reg_l2),
+            name="y0_predictions",
+        )(y0_hidden)
+        y1_predictions = Dense(
+            units=1,
+            activation=None,
+            kernel_regularizer=l2(self.reg_l2),
+            name="y1_predictions",
+        )(y1_hidden)
 
         dl = EpsilonLayer()
-        epsilons = dl(t_predictions, name='epsilon')
-        concat_pred = Concatenate(1)([y0_predictions, y1_predictions, t_predictions, epsilons])
+        epsilons = dl(t_predictions, name="epsilon")
+        concat_pred = Concatenate(1)(
+            [y0_predictions, y1_predictions, t_predictions, epsilons]
+        )
         model = Model(inputs=inputs, outputs=concat_pred)
 
         return model
@@ -112,7 +154,12 @@ def fit(self, X, treatment, y, p=None):
 
         self.dragonnet = self.make_dragonnet(X.shape[1])
 
-        metrics = [regression_loss, binary_classification_loss, treatment_accuracy, track_epsilon]
+        metrics = [
+            regression_loss,
+            binary_classification_loss,
+            treatment_accuracy,
+            track_epsilon,
+        ]
 
         if self.targeted_reg:
             loss = make_tarreg_loss(ratio=self.ratio, dragonnet_loss=self.loss_func)
@@ -120,40 +167,65 @@ def fit(self, X, treatment, y, p=None):
             loss = self.loss_func
 
         self.dragonnet.compile(
-            optimizer=Adam(lr=self.learning_rate),
-            loss=loss, metrics=metrics)
+            optimizer=Adam(lr=self.learning_rate), loss=loss, metrics=metrics
+        )
 
         adam_callbacks = [
             TerminateOnNaN(),
-            EarlyStopping(monitor='val_loss', patience=2, min_delta=0.),
-            ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=self.verbose, mode='auto',
-                              min_delta=1e-8, cooldown=0, min_lr=0)
-
+            EarlyStopping(monitor="val_loss", patience=2, min_delta=0.0),
+            ReduceLROnPlateau(
+                monitor="loss",
+                factor=0.5,
+                patience=5,
+                verbose=self.verbose,
+                mode="auto",
+                min_delta=1e-8,
+                cooldown=0,
+                min_lr=0,
+            ),
         ]
 
-        self.dragonnet.fit(X, y,
-                           callbacks=adam_callbacks,
-                           validation_split=self.val_split,
-                           epochs=self.epochs,
-                           batch_size=self.batch_size,
-                           verbose=self.verbose)
+        self.dragonnet.fit(
+            X,
+            y,
+            callbacks=adam_callbacks,
+            validation_split=self.val_split,
+            epochs=self.epochs,
+            batch_size=self.batch_size,
+            verbose=self.verbose,
+        )
 
         sgd_callbacks = [
             TerminateOnNaN(),
-            EarlyStopping(monitor='val_loss', patience=40, min_delta=0.),
-            ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5, verbose=self.verbose, mode='auto',
-                              min_delta=0., cooldown=0, min_lr=0)
+            EarlyStopping(monitor="val_loss", patience=40, min_delta=0.0),
+            ReduceLROnPlateau(
+                monitor="loss",
+                factor=0.5,
+                patience=5,
+                verbose=self.verbose,
+                mode="auto",
+                min_delta=0.0,
+                cooldown=0,
+                min_lr=0,
+            ),
         ]
 
         sgd_lr = 1e-5
         momentum = 0.9
-        self.dragonnet.compile(optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True), loss=loss, metrics=metrics)
-        self.dragonnet.fit(X, y,
-                           callbacks=sgd_callbacks,
-                           validation_split=self.val_split,
-                           epochs=300,
-                           batch_size=self.batch_size,
-                           verbose=self.verbose)
+        self.dragonnet.compile(
+            optimizer=SGD(lr=sgd_lr, momentum=momentum, nesterov=True),
+            loss=loss,
+            metrics=metrics,
+        )
+        self.dragonnet.fit(
+            X,
+            y,
+            callbacks=sgd_callbacks,
+            validation_split=self.val_split,
+            epochs=300,
+            batch_size=self.batch_size,
+            verbose=self.verbose,
+        )
 
     def predict(self, X, treatment=None, y=None, p=None):
         """
diff --git a/causalml/inference/tf/utils.py b/causalml/inference/tf/utils.py
index 8b65655d..050adfd6 100644
--- a/causalml/inference/tf/utils.py
+++ b/causalml/inference/tf/utils.py
@@ -42,7 +42,7 @@ def regression_loss(concat_true, concat_pred):
     y0_pred = concat_pred[:, 0]
     y1_pred = concat_pred[:, 1]
 
-    loss0 = tf.reduce_sum((1. - t_true) * tf.square(y_true - y0_pred))
+    loss0 = tf.reduce_sum((1.0 - t_true) * tf.square(y_true - y0_pred))
     loss1 = tf.reduce_sum(t_true * tf.square(y_true - y1_pred))
 
     return loss0 + loss1
@@ -60,7 +60,9 @@ def dragonnet_loss_binarycross(concat_true, concat_pred):
     Returns:
         - (float): aggregated regression + classification loss
     """
-    return regression_loss(concat_true, concat_pred) + binary_classification_loss(concat_true, concat_pred)
+    return regression_loss(concat_true, concat_pred) + binary_classification_loss(
+        concat_true, concat_pred
+    )
 
 
 def treatment_accuracy(concat_true, concat_pred):
@@ -96,7 +98,7 @@ def track_epsilon(concat_true, concat_pred):
     return tf.abs(tf.reduce_mean(epsilons))
 
 
-def make_tarreg_loss(ratio=1., dragonnet_loss=dragonnet_loss_binarycross):
+def make_tarreg_loss(ratio=1.0, dragonnet_loss=dragonnet_loss_binarycross):
     """
     Given a specified loss function, returns the same loss function with targeted regularization.
 
@@ -106,6 +108,7 @@ def make_tarreg_loss(ratio=1., dragonnet_loss=dragonnet_loss_binarycross):
     Returns:
         (function): loss function with targeted regularization, weighted by specified ratio
     """
+
     def tarreg_ATE_unbounded_domain_loss(concat_true, concat_pred):
         """
         Returns the loss function (specified in outer function) with targeted regularization.
@@ -141,6 +144,7 @@ class EpsilonLayer(Layer):
     """
     Custom keras layer to allow epsilon to be learned during training process.
     """
+
     def __init__(self):
         """
         Inherits keras' Layer object.
@@ -151,10 +155,9 @@ def build(self, input_shape):
         """
         Creates a trainable weight variable for this layer.
         """
-        self.epsilon = self.add_weight(name='epsilon',
-                                       shape=[1, 1],
-                                       initializer='RandomNormal',
-                                       trainable=True)
+        self.epsilon = self.add_weight(
+            name="epsilon", shape=[1, 1], initializer="RandomNormal", trainable=True
+        )
         super(EpsilonLayer, self).build(input_shape)
 
     def call(self, inputs, **kwargs):
diff --git a/causalml/inference/tree/__init__.py b/causalml/inference/tree/__init__.py
index 7c3bdd3a..44a09ed0 100644
--- a/causalml/inference/tree/__init__.py
+++ b/causalml/inference/tree/__init__.py
@@ -1,4 +1,10 @@
 from .uplift import DecisionTree, UpliftTreeClassifier, UpliftRandomForestClassifier
-from  .causaltree import CausalMSE, CausalTreeRegressor
+from .causaltree import CausalMSE, CausalTreeRegressor
 from .plot import uplift_tree_string, uplift_tree_plot
-from .utils import cat_group, cat_transform, cv_fold_index, cat_continuous, kpi_transform
+from .utils import (
+    cat_group,
+    cat_transform,
+    cv_fold_index,
+    cat_continuous,
+    kpi_transform,
+)
diff --git a/causalml/inference/tree/plot.py b/causalml/inference/tree/plot.py
index d11efdc3..c075ae56 100644
--- a/causalml/inference/tree/plot.py
+++ b/causalml/inference/tree/plot.py
@@ -9,7 +9,7 @@
 
 
 def uplift_tree_string(decisionTree, x_names):
-    '''
+    """
     Convert the tree to string for print.
 
     Args
@@ -24,34 +24,40 @@ def uplift_tree_string(decisionTree, x_names):
     Returns
     -------
     A string representation of the tree.
-    '''
+    """
 
     # Column Heading
     dcHeadings = {}
-    for i, szY in enumerate(x_names + ['treatment_group_key']):
-        szCol = 'Column %d' % i
+    for i, szY in enumerate(x_names + ["treatment_group_key"]):
+        szCol = "Column %d" % i
         dcHeadings[szCol] = str(szY)
 
-    def toString(decisionTree, indent=''):
+    def toString(decisionTree, indent=""):
         if decisionTree.results is not None:  # leaf node
             return str(decisionTree.results)
         else:
-            szCol = 'Column %s' % decisionTree.col
+            szCol = "Column %s" % decisionTree.col
             if szCol in dcHeadings:
                 szCol = dcHeadings[szCol]
-            if isinstance(decisionTree.value, int) or isinstance(decisionTree.value, float):
-                decision = '%s >= %s?' % (szCol, decisionTree.value)
+            if isinstance(decisionTree.value, int) or isinstance(
+                decisionTree.value, float
+            ):
+                decision = "%s >= %s?" % (szCol, decisionTree.value)
             else:
-                decision = '%s == %s?' % (szCol, decisionTree.value)
-            trueBranch = indent + 'yes -> ' + toString(decisionTree.trueBranch, indent + '\t\t')
-            falseBranch = indent + 'no  -> ' + toString(decisionTree.falseBranch, indent + '\t\t')
-            return (decision + '\n' + trueBranch + '\n' + falseBranch)
+                decision = "%s == %s?" % (szCol, decisionTree.value)
+            trueBranch = (
+                indent + "yes -> " + toString(decisionTree.trueBranch, indent + "\t\t")
+            )
+            falseBranch = (
+                indent + "no  -> " + toString(decisionTree.falseBranch, indent + "\t\t")
+            )
+            return decision + "\n" + trueBranch + "\n" + falseBranch
 
     print(toString(decisionTree))
 
 
 def uplift_tree_plot(decisionTree, x_names):
-    '''
+    """
     Convert the tree to dot graph for plots.
 
     Args
@@ -66,47 +72,93 @@ def uplift_tree_plot(decisionTree, x_names):
     Returns
     -------
     Dot class representing the tree graph.
-    '''
+    """
 
     # Column Heading
     dcHeadings = {}
-    for i, szY in enumerate(x_names + ['treatment_group_key']):
-        szCol = 'Column %d' % i
+    for i, szY in enumerate(x_names + ["treatment_group_key"]):
+        szCol = "Column %d" % i
         dcHeadings[szCol] = str(szY)
 
     dcNodes = defaultdict(list)
     """Plots the obtained decision tree. """
 
-    def toString(iSplit, decisionTree, bBranch, szParent="null", indent='', indexParent=0, upliftScores=list()):
+    def toString(
+        iSplit,
+        decisionTree,
+        bBranch,
+        szParent="null",
+        indent="",
+        indexParent=0,
+        upliftScores=list(),
+    ):
         if decisionTree.results is not None:  # leaf node
             lsY = []
             for tr, p in zip(decisionTree.classes_, decisionTree.results):
-                lsY.append(f'{tr}:{p:.2f}')
-            dcY = {"name": ', '.join(lsY), "parent": szParent}
+                lsY.append(f"{tr}:{p:.2f}")
+            dcY = {"name": ", ".join(lsY), "parent": szParent}
             dcSummary = decisionTree.summary
-            upliftScores += [dcSummary['matchScore']]
-            dcNodes[iSplit].append(['leaf', dcY['name'], szParent, bBranch,
-                                    str(-round(float(decisionTree.summary['impurity']), 3)), dcSummary['samples'],
-                                    dcSummary['group_size'], dcSummary['upliftScore'], dcSummary['matchScore'],
-                                    indexParent])
+            upliftScores += [dcSummary["matchScore"]]
+            dcNodes[iSplit].append(
+                [
+                    "leaf",
+                    dcY["name"],
+                    szParent,
+                    bBranch,
+                    str(-round(float(decisionTree.summary["impurity"]), 3)),
+                    dcSummary["samples"],
+                    dcSummary["group_size"],
+                    dcSummary["upliftScore"],
+                    dcSummary["matchScore"],
+                    indexParent,
+                ]
+            )
         else:
-            szCol = 'Column %s' % decisionTree.col
+            szCol = "Column %s" % decisionTree.col
             if szCol in dcHeadings:
                 szCol = dcHeadings[szCol]
-            if isinstance(decisionTree.value, int) or isinstance(decisionTree.value, float):
-                decision = '%s >= %s' % (szCol, decisionTree.value)
+            if isinstance(decisionTree.value, int) or isinstance(
+                decisionTree.value, float
+            ):
+                decision = "%s >= %s" % (szCol, decisionTree.value)
             else:
-                decision = '%s == %s' % (szCol, decisionTree.value)
+                decision = "%s == %s" % (szCol, decisionTree.value)
 
             indexOfLevel = len(dcNodes[iSplit])
-            toString(iSplit + 1, decisionTree.trueBranch, True, decision, indent + '\t\t', indexOfLevel, upliftScores)
-            toString(iSplit + 1, decisionTree.falseBranch, False, decision, indent + '\t\t', indexOfLevel, upliftScores)
+            toString(
+                iSplit + 1,
+                decisionTree.trueBranch,
+                True,
+                decision,
+                indent + "\t\t",
+                indexOfLevel,
+                upliftScores,
+            )
+            toString(
+                iSplit + 1,
+                decisionTree.falseBranch,
+                False,
+                decision,
+                indent + "\t\t",
+                indexOfLevel,
+                upliftScores,
+            )
             dcSummary = decisionTree.summary
-            upliftScores += [dcSummary['matchScore']]
-            dcNodes[iSplit].append([iSplit + 1, decision, szParent, bBranch,
-                                    str(-round(float(decisionTree.summary['impurity']), 3)), dcSummary['samples'],
-                                    dcSummary['group_size'], dcSummary['upliftScore'], dcSummary['matchScore'],
-                                    indexParent])
+            upliftScores += [dcSummary["matchScore"]]
+            dcNodes[iSplit].append(
+                [
+                    iSplit + 1,
+                    decision,
+                    szParent,
+                    bBranch,
+                    str(-round(float(decisionTree.summary["impurity"]), 3)),
+                    dcSummary["samples"],
+                    dcSummary["group_size"],
+                    dcSummary["upliftScore"],
+                    dcSummary["matchScore"],
+                    indexParent,
+                ]
+            )
 
     upliftScores = list()
     toString(0, decisionTree, None, upliftScores=upliftScores)
@@ -116,74 +168,114 @@ def toString(iSplit, decisionTree, bBranch, szParent="null", indent='', indexPar
         # calculate colors for nodes based on uplifts
         minUplift = min(upliftScores)
         maxUplift = max(upliftScores)
-        upliftLevels = [(uplift-minUplift)/(maxUplift-minUplift) for uplift in upliftScores]  # min max scaler
-        baseUplift = float(decisionTree.summary.get('matchScore'))
-        baseUpliftLevel = (baseUplift - minUplift) / (maxUplift - minUplift)  # min max scaler normalization
-        white = np.array([255., 255., 255.])
-        blue = np.array([31., 119., 180.])
-        green = np.array([0., 128., 0.])
+        upliftLevels = [
+            (uplift - minUplift) / (maxUplift - minUplift) for uplift in upliftScores
+        ]  # min max scaler
+        baseUplift = float(decisionTree.summary.get("matchScore"))
+        baseUpliftLevel = (baseUplift - minUplift) / (
+            maxUplift - minUplift
+        )  # min max scaler normalization
+        white = np.array([255.0, 255.0, 255.0])
+        blue = np.array([31.0, 119.0, 180.0])
+        green = np.array([0.0, 128.0, 0.0])
         for i, upliftLevel in enumerate(upliftLevels):
             if upliftLevel >= baseUpliftLevel:  # go blue
                 color = upliftLevel * blue + (1 - upliftLevel) * white
             else:  # go green
                 color = (1 - upliftLevel) * green + upliftLevel * white
             color = [int(c) for c in color]
-            upliftScoreToColor[upliftScores[i]] = ('#%2x%2x%2x' % tuple(color)).replace(' ', '0')  # color code
+            upliftScoreToColor[upliftScores[i]] = ("#%2x%2x%2x" % tuple(color)).replace(
+                " ", "0"
+            )  # color code
     except Exception as e:
         print(e)
 
-    lsDot = ['digraph Tree {',
-             'node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;',
-             'edge [fontname=helvetica] ;'
-             ]
+    lsDot = [
+        "digraph Tree {",
+        'node [shape=box, style="filled, rounded", color="black", fontname=helvetica] ;',
+        "edge [fontname=helvetica] ;",
+    ]
     i_node = 0
     dcParent = {}
-    totalSample = int(decisionTree.summary.get('samples'))  # initialize the value with the total sample size at root
+    totalSample = int(
+        decisionTree.summary.get("samples")
+    )  # initialize the value with the total sample size at root
     for nSplit in range(len(dcNodes.items())):
         lsY = dcNodes[nSplit]
         indexOfLevel = 0
         for lsX in lsY:
-            iSplit, decision, szParent, bBranch, szImpurity, szSamples, szGroup, \
-                upliftScore, matchScore, indexParent = lsX
+            (
+                iSplit,
+                decision,
+                szParent,
+                bBranch,
+                szImpurity,
+                szSamples,
+                szGroup,
+                upliftScore,
+                matchScore,
+                indexParent,
+            ) = lsX
 
-            sampleProportion = round(int(szSamples)*100./totalSample, 1)
+            sampleProportion = round(int(szSamples) * 100.0 / totalSample, 1)
             if type(iSplit) is int:
-                szSplit = '%d-%d' % (iSplit, indexOfLevel)
+                szSplit = "%d-%d" % (iSplit, indexOfLevel)
                 dcParent[szSplit] = i_node
-                lsDot.append('%d [label=<%s<br/> impurity %s<br/> total_sample %s (%s&#37;)<br/>group_sample %s <br/> '
-                             'uplift score: %s <br/> uplift p_value %s <br/> '
-                             'validation uplift score %s>, fillcolor="%s"] ;' % (
-                                 i_node, decision.replace('>=', '&ge;').replace('?', ''), szImpurity, szSamples,
-                                 str(sampleProportion), szGroup, str(upliftScore[0]), str(upliftScore[1]),
-                                 str(matchScore), upliftScoreToColor.get(matchScore, '#e5813900')
-                             ))
+                lsDot.append(
+                    "%d [label=<%s<br/> impurity %s<br/> total_sample %s (%s&#37;)<br/>group_sample %s <br/> "
+                    "uplift score: %s <br/> uplift p_value %s <br/> "
+                    'validation uplift score %s>, fillcolor="%s"] ;'
+                    % (
+                        i_node,
+                        decision.replace(">=", "&ge;").replace("?", ""),
+                        szImpurity,
+                        szSamples,
+                        str(sampleProportion),
+                        szGroup,
+                        str(upliftScore[0]),
+                        str(upliftScore[1]),
+                        str(matchScore),
+                        upliftScoreToColor.get(matchScore, "#e5813900"),
+                    )
+                )
             else:
-                lsDot.append('%d [label=< impurity %s<br/> total_sample %s (%s&#37;)<br/>group_sample %s <br/> '
-                             'uplift score: %s <br/> uplift p_value %s <br/> validation uplift score %s <br/> '
-                             'mean %s>, fillcolor="%s"] ;' % (
-                                 i_node, szImpurity, szSamples, str(sampleProportion), szGroup, str(upliftScore[0]),
-                                 str(upliftScore[1]), str(matchScore), decision,
-                                 upliftScoreToColor.get(matchScore, '#e5813900')
-                             ))
-
-            if szParent != 'null':
+                lsDot.append(
+                    "%d [label=< impurity %s<br/> total_sample %s (%s&#37;)<br/>group_sample %s <br/> "
+                    "uplift score: %s <br/> uplift p_value %s <br/> validation uplift score %s <br/> "
+                    'mean %s>, fillcolor="%s"] ;'
+                    % (
+                        i_node,
+                        szImpurity,
+                        szSamples,
+                        str(sampleProportion),
+                        szGroup,
+                        str(upliftScore[0]),
+                        str(upliftScore[1]),
+                        str(matchScore),
+                        decision,
+                        upliftScoreToColor.get(matchScore, "#e5813900"),
+                    )
+                )
+
+            if szParent != "null":
                 if bBranch:
-                    szAngle = '45'
-                    szHeadLabel = 'True'
+                    szAngle = "45"
+                    szHeadLabel = "True"
                 else:
-                    szAngle = '-45'
-                    szHeadLabel = 'False'
-                szSplit = '%d-%d' % (nSplit, indexParent)
+                    szAngle = "-45"
+                    szHeadLabel = "False"
+                szSplit = "%d-%d" % (nSplit, indexParent)
                 p_node = dcParent[szSplit]
                 if nSplit == 1:
-                    lsDot.append('%d -> %d [labeldistance=2.5, labelangle=%s, headlabel="%s"] ;' % (p_node,
-                                                                                                    i_node, szAngle,
-                                                                                                    szHeadLabel))
+                    lsDot.append(
+                        '%d -> %d [labeldistance=2.5, labelangle=%s, headlabel="%s"] ;'
+                        % (p_node, i_node, szAngle, szHeadLabel)
+                    )
                 else:
-                    lsDot.append('%d -> %d ;' % (p_node, i_node))
+                    lsDot.append("%d -> %d ;" % (p_node, i_node))
             i_node += 1
             indexOfLevel += 1
-    lsDot.append('}')
-    dot_data = '\n'.join(lsDot)
+    lsDot.append("}")
+    dot_data = "\n".join(lsDot)
     graph = pydotplus.graph_from_dot_data(dot_data)
     return graph
diff --git a/causalml/inference/tree/utils.py b/causalml/inference/tree/utils.py
index 73b29ec3..ef60732e 100644
--- a/causalml/inference/tree/utils.py
+++ b/causalml/inference/tree/utils.py
@@ -7,7 +7,7 @@
 
 
 def cat_group(dfx, kpix, n_group=10):
-    '''
+    """
     Category Reduction for Categorical Variables
 
     Args
@@ -25,7 +25,7 @@ def cat_group(dfx, kpix, n_group=10):
     Returns
     -------
     The transformed categorical feature value list.
-    '''
+    """
     if dfx[kpix].nunique() > n_group:
         # get the top categories
         top = dfx[kpix].isin(dfx[kpix].value_counts().index[:n_group])
@@ -36,7 +36,7 @@ def cat_group(dfx, kpix, n_group=10):
 
 
 def cat_transform(dfx, kpix, kpi1):
-    '''
+    """
     Encoding string features.
 
     Args
@@ -58,9 +58,9 @@ def cat_transform(dfx, kpix, kpi1):
 
     kpi1 : list
         The updated feature names containing the new dummy feature names.
-    '''
+    """
     df_dummy = pd.get_dummies(dfx[kpix].values)
-    new_col_names = ['%s_%s' % (kpix, x) for x in df_dummy.columns]
+    new_col_names = ["%s_%s" % (kpix, x) for x in df_dummy.columns]
     df_dummy.columns = new_col_names
     dfx = pd.concat([dfx, df_dummy], axis=1)
     for new_col in new_col_names:
@@ -72,7 +72,7 @@ def cat_transform(dfx, kpix, kpi1):
 
 
 def cv_fold_index(n, i, k, random_seed=2018):
-    '''
+    """
     Encoding string features.
 
     Args
@@ -94,7 +94,7 @@ def cv_fold_index(n, i, k, random_seed=2018):
 
     kpi1 : list
         The updated feature names containing the new dummy feature names.
-    '''
+    """
     np.random.seed(random_seed)
     rlist = np.random.choice(a=range(k), size=n, replace=True)
     fold_i_index = np.where(rlist == i)[0]
@@ -102,8 +102,8 @@ def cv_fold_index(n, i, k, random_seed=2018):
 
 
 # Categorize continuous variable
-def cat_continuous(x, granularity='Medium'):
-    '''
+def cat_continuous(x, granularity="Medium"):
+    """
     Categorize (bin) continuous variable based on percentile.
 
     Args
@@ -119,72 +119,109 @@ def cat_continuous(x, granularity='Medium'):
     -------
     res : list
         List of percentile bins for the feature value.
-    '''
-    if granularity == 'High':
-        lspercentile = [np.percentile(x, 5),
-                        np.percentile(x, 10),
-                        np.percentile(x, 15),
-                        np.percentile(x, 20),
-                        np.percentile(x, 25),
-                        np.percentile(x, 30),
-                        np.percentile(x, 35),
-                        np.percentile(x, 40),
-                        np.percentile(x, 45),
-                        np.percentile(x, 50),
-                        np.percentile(x, 55),
-                        np.percentile(x, 60),
-                        np.percentile(x, 65),
-                        np.percentile(x, 70),
-                        np.percentile(x, 75),
-                        np.percentile(x, 80),
-                        np.percentile(x, 85),
-                        np.percentile(x, 90),
-                        np.percentile(x, 95),
-                        np.percentile(x, 99)
-                        ]
-        res = ['> p90 (%s)' % (lspercentile[8]) if z > lspercentile[8] else
-               '<= p10 (%s)' % (lspercentile[0]) if z <= lspercentile[0] else
-               '<= p20 (%s)' % (lspercentile[1]) if z <= lspercentile[1] else
-               '<= p30 (%s)' % (lspercentile[2]) if z <= lspercentile[2] else
-               '<= p40 (%s)' % (lspercentile[3]) if z <= lspercentile[3] else
-               '<= p50 (%s)' % (lspercentile[4]) if z <= lspercentile[4] else
-               '<= p60 (%s)' % (lspercentile[5]) if z <= lspercentile[5] else
-               '<= p70 (%s)' % (lspercentile[6]) if z <= lspercentile[6] else
-               '<= p80 (%s)' % (lspercentile[7]) if z <= lspercentile[7] else
-               '<= p90 (%s)' % (lspercentile[8]) if z <= lspercentile[8] else
-               '> p90 (%s)' % (lspercentile[8]) for z in x]
-    elif granularity == 'Medium':
-        lspercentile = [np.percentile(x, 10),
-                        np.percentile(x, 20),
-                        np.percentile(x, 30),
-                        np.percentile(x, 40),
-                        np.percentile(x, 50),
-                        np.percentile(x, 60),
-                        np.percentile(x, 70),
-                        np.percentile(x, 80),
-                        np.percentile(x, 90)
-                        ]
-        res = ['<= p10 (%s)' % (lspercentile[0]) if z <= lspercentile[0] else
-               '<= p20 (%s)' % (lspercentile[1]) if z <= lspercentile[1] else
-               '<= p30 (%s)' % (lspercentile[2]) if z <= lspercentile[2] else
-               '<= p40 (%s)' % (lspercentile[3]) if z <= lspercentile[3] else
-               '<= p50 (%s)' % (lspercentile[4]) if z <= lspercentile[4] else
-               '<= p60 (%s)' % (lspercentile[5]) if z <= lspercentile[5] else
-               '<= p70 (%s)' % (lspercentile[6]) if z <= lspercentile[6] else
-               '<= p80 (%s)' % (lspercentile[7]) if z <= lspercentile[7] else
-               '<= p90 (%s)' % (lspercentile[8]) if z <= lspercentile[8] else
-               '> p90 (%s)' % (lspercentile[8]) for z in x]
+    """
+    if granularity == "High":
+        lspercentile = [
+            np.percentile(x, 5),
+            np.percentile(x, 10),
+            np.percentile(x, 15),
+            np.percentile(x, 20),
+            np.percentile(x, 25),
+            np.percentile(x, 30),
+            np.percentile(x, 35),
+            np.percentile(x, 40),
+            np.percentile(x, 45),
+            np.percentile(x, 50),
+            np.percentile(x, 55),
+            np.percentile(x, 60),
+            np.percentile(x, 65),
+            np.percentile(x, 70),
+            np.percentile(x, 75),
+            np.percentile(x, 80),
+            np.percentile(x, 85),
+            np.percentile(x, 90),
+            np.percentile(x, 95),
+            np.percentile(x, 99),
+        ]
+        res = [
+            "> p90 (%s)" % (lspercentile[8])
+            if z > lspercentile[8]
+            else "<= p10 (%s)" % (lspercentile[0])
+            if z <= lspercentile[0]
+            else "<= p20 (%s)" % (lspercentile[1])
+            if z <= lspercentile[1]
+            else "<= p30 (%s)" % (lspercentile[2])
+            if z <= lspercentile[2]
+            else "<= p40 (%s)" % (lspercentile[3])
+            if z <= lspercentile[3]
+            else "<= p50 (%s)" % (lspercentile[4])
+            if z <= lspercentile[4]
+            else "<= p60 (%s)" % (lspercentile[5])
+            if z <= lspercentile[5]
+            else "<= p70 (%s)" % (lspercentile[6])
+            if z <= lspercentile[6]
+            else "<= p80 (%s)" % (lspercentile[7])
+            if z <= lspercentile[7]
+            else "<= p90 (%s)" % (lspercentile[8])
+            if z <= lspercentile[8]
+            else "> p90 (%s)" % (lspercentile[8])
+            for z in x
+        ]
+    elif granularity == "Medium":
+        lspercentile = [
+            np.percentile(x, 10),
+            np.percentile(x, 20),
+            np.percentile(x, 30),
+            np.percentile(x, 40),
+            np.percentile(x, 50),
+            np.percentile(x, 60),
+            np.percentile(x, 70),
+            np.percentile(x, 80),
+            np.percentile(x, 90),
+        ]
+        res = [
+            "<= p10 (%s)" % (lspercentile[0])
+            if z <= lspercentile[0]
+            else "<= p20 (%s)" % (lspercentile[1])
+            if z <= lspercentile[1]
+            else "<= p30 (%s)" % (lspercentile[2])
+            if z <= lspercentile[2]
+            else "<= p40 (%s)" % (lspercentile[3])
+            if z <= lspercentile[3]
+            else "<= p50 (%s)" % (lspercentile[4])
+            if z <= lspercentile[4]
+            else "<= p60 (%s)" % (lspercentile[5])
+            if z <= lspercentile[5]
+            else "<= p70 (%s)" % (lspercentile[6])
+            if z <= lspercentile[6]
+            else "<= p80 (%s)" % (lspercentile[7])
+            if z <= lspercentile[7]
+            else "<= p90 (%s)" % (lspercentile[8])
+            if z <= lspercentile[8]
+            else "> p90 (%s)" % (lspercentile[8])
+            for z in x
+        ]
     else:
-        lspercentile = [np.percentile(x, 15), np.percentile(x, 50), np.percentile(x, 85)]
-        res = ['1-Very Low' if z < lspercentile[0] else
-               '2-Low' if z < lspercentile[1] else
-               '3-High' if z < lspercentile[2] else
-               '4-Very High' for z in x]
+        lspercentile = [
+            np.percentile(x, 15),
+            np.percentile(x, 50),
+            np.percentile(x, 85),
+        ]
+        res = [
+            "1-Very Low"
+            if z < lspercentile[0]
+            else "2-Low"
+            if z < lspercentile[1]
+            else "3-High"
+            if z < lspercentile[2]
+            else "4-Very High"
+            for z in x
+        ]
     return res
 
 
 def kpi_transform(dfx, kpi_combo, kpi_combo_new):
-    '''
+    """
     Feature transformation from continuous feature to binned features for a list of features
 
     Args
@@ -203,7 +240,7 @@ def kpi_transform(dfx, kpi_combo, kpi_combo_new):
     -------
     dfx : DataFrame
         Updated DataFrame containing the new features.
-    '''
+    """
     for j in range(len(kpi_combo)):
         if type(dfx[kpi_combo[j]].values[0]) is str:
             dfx[kpi_combo_new[j]] = dfx[kpi_combo[j]].values
@@ -211,10 +248,10 @@ def kpi_transform(dfx, kpi_combo, kpi_combo_new):
         else:
             if len(kpi_combo) > 1:
                 dfx[kpi_combo_new[j]] = cat_continuous(
-                    dfx[kpi_combo[j]].values, granularity='Low'
+                    dfx[kpi_combo[j]].values, granularity="Low"
                 )
             else:
                 dfx[kpi_combo_new[j]] = cat_continuous(
-                    dfx[kpi_combo[j]].values, granularity='High'
+                    dfx[kpi_combo[j]].values, granularity="High"
                 )
     return dfx
diff --git a/causalml/match.py b/causalml/match.py
index 2f9d309d..2ca55023 100644
--- a/causalml/match.py
+++ b/causalml/match.py
@@ -8,7 +8,7 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import check_random_state
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 def smd(feature, treatment):
@@ -28,7 +28,7 @@ def smd(feature, treatment):
     """
     t = feature[treatment == 1]
     c = feature[treatment == 0]
-    return (t.mean() - c.mean()) / np.sqrt(.5 * (t.var() + c.var()))
+    return (t.mean() - c.mean()) / np.sqrt(0.5 * (t.var() + c.var()))
 
 
 def create_table_one(data, treatment_col, features):
@@ -48,26 +48,25 @@ def create_table_one(data, treatment_col, features):
             the treatment and control groups, and the SMD between two groups
             for the features.
     """
-    t1 = pd.pivot_table(data[features + [treatment_col]],
-                        columns=treatment_col,
-                        aggfunc=[lambda x: '{:.2f} ({:.2f})'.format(x.mean(),
-                                                                    x.std())])
+    t1 = pd.pivot_table(
+        data[features + [treatment_col]],
+        columns=treatment_col,
+        aggfunc=[lambda x: "{:.2f} ({:.2f})".format(x.mean(), x.std())],
+    )
     t1.columns = t1.columns.droplevel(level=0)
-    t1['SMD'] = data[features].apply(
-        lambda x: smd(x, data[treatment_col])
-    ).round(4)
+    t1["SMD"] = data[features].apply(lambda x: smd(x, data[treatment_col])).round(4)
 
-    n_row = pd.pivot_table(data[[features[0], treatment_col]],
-                           columns=treatment_col,
-                           aggfunc=['count'])
+    n_row = pd.pivot_table(
+        data[[features[0], treatment_col]], columns=treatment_col, aggfunc=["count"]
+    )
     n_row.columns = n_row.columns.droplevel(level=0)
-    n_row['SMD'] = ''
-    n_row.index = ['n']
+    n_row["SMD"] = ""
+    n_row.index = ["n"]
 
     t1 = pd.concat([n_row, t1], axis=0)
-    t1.columns.name = ''
-    t1.columns = ['Control', 'Treatment', 'SMD']
-    t1.index.name = 'Variable'
+    t1.columns.name = ""
+    t1.columns = ["Control", "Treatment", "SMD"]
+    t1.index.name = "Variable"
 
     return t1
 
@@ -89,8 +88,15 @@ class NearestNeighborMatch(object):
             None means 1 unless in a joblib.parallel_backend context. -1 means using all processors
     """
 
-    def __init__(self, caliper=.2, replace=False, ratio=1, shuffle=True,
-                 random_state=None, n_jobs=-1):
+    def __init__(
+        self,
+        caliper=0.2,
+        replace=False,
+        ratio=1,
+        shuffle=True,
+        random_state=None,
+        n_jobs=-1,
+    ):
         """Initialize a propensity score matching model.
 
         Args:
@@ -124,7 +130,7 @@ def match(self, data, treatment_col, score_cols):
             (pandas.DataFrame): The subset of data consisting of matched
                 treatment and control group data.
         """
-        assert type(score_cols) is list, 'score_cols must be a list'
+        assert type(score_cols) is list, "score_cols must be a list"
         treatment = data.loc[data[treatment_col] == 1, score_cols]
         control = data.loc[data[treatment_col] == 0, score_cols]
 
@@ -133,15 +139,19 @@ def match(self, data, treatment_col, score_cols):
         if self.replace:
             scaler = StandardScaler()
             scaler.fit(data[score_cols])
-            treatment_scaled = pd.DataFrame(scaler.transform(treatment),
-                                            index=treatment.index)
-            control_scaled = pd.DataFrame(scaler.transform(control),
-                                          index=control.index)
+            treatment_scaled = pd.DataFrame(
+                scaler.transform(treatment), index=treatment.index
+            )
+            control_scaled = pd.DataFrame(
+                scaler.transform(control), index=control.index
+            )
 
             # SD is the same as caliper because we use a StandardScaler above
             sdcal = self.caliper
 
-            matching_model = NearestNeighbors(n_neighbors=self.ratio, n_jobs=self.n_jobs)
+            matching_model = NearestNeighbors(
+                n_neighbors=self.ratio, n_jobs=self.n_jobs
+            )
             matching_model.fit(control_scaled)
             distances, indices = matching_model.kneighbors(treatment_scaled)
 
@@ -150,19 +160,18 @@ def match(self, data, treatment_col, score_cols):
             # the (n_obs * self.ratio, 1) matrices and data frame.
             distances = distances.T.flatten()
             indices = indices.T.flatten()
-            treatment_scaled = pd.concat([treatment_scaled] * self.ratio,
-                                         axis=0)
+            treatment_scaled = pd.concat([treatment_scaled] * self.ratio, axis=0)
 
-            cond = (distances / np.sqrt(len(score_cols)) ) < sdcal
+            cond = (distances / np.sqrt(len(score_cols))) < sdcal
             # Deduplicate the indices of the treatment group
             t_idx_matched = np.unique(treatment_scaled.loc[cond].index)
             # XXX: Should we deduplicate the indices of the control group too?
             c_idx_matched = np.array(control_scaled.iloc[indices[cond]].index)
         else:
             assert len(score_cols) == 1, (
-                'Matching on multiple columns is only supported using the '
-                'replacement method (if matching on multiple columns, set '
-                'replace=True).'
+                "Matching on multiple columns is only supported using the "
+                "replacement method (if matching on multiple columns, set "
+                "replace=True)."
             )
             # unpack score_cols for the single-variable matching case
             score_col = score_cols[0]
@@ -174,19 +183,22 @@ def match(self, data, treatment_col, score_cols):
 
             t_idx_matched = []
             c_idx_matched = []
-            control['unmatched'] = True
+            control["unmatched"] = True
 
             for t_idx in t_indices:
-                dist = np.abs(control.loc[control.unmatched, score_col]
-                              - treatment.loc[t_idx, score_col])
+                dist = np.abs(
+                    control.loc[control.unmatched, score_col]
+                    - treatment.loc[t_idx, score_col]
+                )
                 c_idx_min = dist.idxmin()
                 if dist[c_idx_min] <= sdcal:
                     t_idx_matched.append(t_idx)
                     c_idx_matched.append(c_idx_min)
-                    control.loc[c_idx_min, 'unmatched'] = False
+                    control.loc[c_idx_min, "unmatched"] = False
 
-        return data.loc[np.concatenate([np.array(t_idx_matched),
-                                        np.array(c_idx_matched)])]
+        return data.loc[
+            np.concatenate([np.array(t_idx_matched), np.array(c_idx_matched)])
+        ]
 
     def match_by_group(self, data, treatment_col, score_cols, groupby_col):
         """Find matches from the control group stratified by groupby_col, by
@@ -204,20 +216,31 @@ def match_by_group(self, data, treatment_col, score_cols, groupby_col):
                 treatment and control group data.
         """
         matched = data.groupby(groupby_col).apply(
-            lambda x: self.match(data=x, treatment_col=treatment_col,
-                                 score_cols=score_cols)
+            lambda x: self.match(
+                data=x, treatment_col=treatment_col, score_cols=score_cols
+            )
         )
         return matched.reset_index(level=0, drop=True)
 
 
 class MatchOptimizer(object):
-    def __init__(self, treatment_col='is_treatment', ps_col='pihat',
-                 user_col=None, matching_covariates=['pihat'], max_smd=0.1,
-                 max_deviation=0.1, caliper_range=(0.01, 0.5),
-                 max_pihat_range=(0.95, 0.999), max_iter_per_param=5,
-                 min_users_per_group=1000, smd_cols=['pihat'],
-                 dev_cols_transformations={'pihat': np.mean},
-                 dev_factor=1., verbose=True):
+    def __init__(
+        self,
+        treatment_col="is_treatment",
+        ps_col="pihat",
+        user_col=None,
+        matching_covariates=["pihat"],
+        max_smd=0.1,
+        max_deviation=0.1,
+        caliper_range=(0.01, 0.5),
+        max_pihat_range=(0.95, 0.999),
+        max_iter_per_param=5,
+        min_users_per_group=1000,
+        smd_cols=["pihat"],
+        dev_cols_transformations={"pihat": np.mean},
+        dev_factor=1.0,
+        verbose=True,
+    ):
         """Finds the set of parameters that gives the best matching result.
 
         Score = (number of features with SMD > max_smd)
@@ -262,17 +285,15 @@ def __init__(self, treatment_col='is_treatment', ps_col='pihat',
         self.matching_covariates = matching_covariates
         self.max_smd = max_smd
         self.max_deviation = max_deviation
-        self.caliper_range = np.linspace(*caliper_range,
-                                         num=max_iter_per_param)
-        self.max_pihat_range = np.linspace(*max_pihat_range,
-                                           num=max_iter_per_param)
+        self.caliper_range = np.linspace(*caliper_range, num=max_iter_per_param)
+        self.max_pihat_range = np.linspace(*max_pihat_range, num=max_iter_per_param)
         self.max_iter_per_param = max_iter_per_param
         self.min_users_per_group = min_users_per_group
         self.smd_cols = smd_cols
         self.dev_factor = dev_factor
         self.dev_cols_transformations = dev_cols_transformations
         self.best_params = {}
-        self.best_score = 1e7   # ideal score is 0
+        self.best_score = 1e7  # ideal score is 0
         self.verbose = verbose
         self.pass_all = False
 
@@ -280,46 +301,79 @@ def single_match(self, score_cols, pihat_threshold, caliper):
         matcher = NearestNeighborMatch(caliper=caliper, replace=True)
         df_matched = matcher.match(
             data=self.df[self.df[self.ps_col] < pihat_threshold],
-            treatment_col=self.treatment_col, score_cols=score_cols
+            treatment_col=self.treatment_col,
+            score_cols=score_cols,
         )
         return df_matched
 
-    def check_table_one(self, tableone, matched, score_cols, pihat_threshold,
-                        caliper):
+    def check_table_one(self, tableone, matched, score_cols, pihat_threshold, caliper):
         # check if better than past runs
-        smd_values = np.abs(tableone[tableone.index != 'n']['SMD'].astype(float))
+        smd_values = np.abs(tableone[tableone.index != "n"]["SMD"].astype(float))
         num_cols_over_smd = (smd_values >= self.max_smd).sum()
-        self.cols_to_fix = smd_values[smd_values >= self.max_smd].sort_values(ascending=False).index.values
+        self.cols_to_fix = (
+            smd_values[smd_values >= self.max_smd]
+            .sort_values(ascending=False)
+            .index.values
+        )
         if self.user_col is None:
-            num_users_per_group = matched.reset_index().groupby(self.treatment_col)['index'].count().min()
+            num_users_per_group = (
+                matched.reset_index().groupby(self.treatment_col)["index"].count().min()
+            )
         else:
-            num_users_per_group = matched.groupby(self.treatment_col)[self.user_col].count().min()
-        deviations = [np.abs(self.original_stats[col] / matched[matched[self.treatment_col] == 1][col].mean() - 1)
-                      for col in self.dev_cols_transformations.keys()]
+            num_users_per_group = (
+                matched.groupby(self.treatment_col)[self.user_col].count().min()
+            )
+        deviations = [
+            np.abs(
+                self.original_stats[col]
+                / matched[matched[self.treatment_col] == 1][col].mean()
+                - 1
+            )
+            for col in self.dev_cols_transformations.keys()
+        ]
 
         score = num_cols_over_smd
-        score += len([col for col in self.smd_cols if smd_values.loc[col] >= self.max_smd])
-        score += np.sum([dev*10*self.dev_factor for dev in deviations])
+        score += len(
+            [col for col in self.smd_cols if smd_values.loc[col] >= self.max_smd]
+        )
+        score += np.sum([dev * 10 * self.dev_factor for dev in deviations])
 
         # check if can be considered as best score
         if score < self.best_score and num_users_per_group > self.min_users_per_group:
             self.best_score = score
-            self.best_params = {'score_cols': score_cols.copy(), 'pihat': pihat_threshold, 'caliper': caliper}
+            self.best_params = {
+                "score_cols": score_cols.copy(),
+                "pihat": pihat_threshold,
+                "caliper": caliper,
+            }
             self.best_matched = matched.copy()
         if self.verbose:
-            logger.info('\tScore: {:.03f} (Best Score: {:.03f})\n'.format(score, self.best_score))
+            logger.info(
+                "\tScore: {:.03f} (Best Score: {:.03f})\n".format(
+                    score, self.best_score
+                )
+            )
 
         # check if passes all criteria
-        self.pass_all = ((num_users_per_group > self.min_users_per_group) and (num_cols_over_smd == 0) and
-                         all(dev < self.max_deviation for dev in deviations))
+        self.pass_all = (
+            (num_users_per_group > self.min_users_per_group)
+            and (num_cols_over_smd == 0)
+            and all(dev < self.max_deviation for dev in deviations)
+        )
 
     def match_and_check(self, score_cols, pihat_threshold, caliper):
         if self.verbose:
-            logger.info('Preparing match for: caliper={:.03f}, '
-                        'pihat_threshold={:.03f}, '
-                        'score_cols={}'.format(caliper, pihat_threshold, score_cols))
-        df_matched = self.single_match(score_cols=score_cols, pihat_threshold=pihat_threshold, caliper=caliper)
-        tableone = create_table_one(df_matched, self.treatment_col, self.matching_covariates)
+            logger.info(
+                "Preparing match for: caliper={:.03f}, "
+                "pihat_threshold={:.03f}, "
+                "score_cols={}".format(caliper, pihat_threshold, score_cols)
+            )
+        df_matched = self.single_match(
+            score_cols=score_cols, pihat_threshold=pihat_threshold, caliper=caliper
+        )
+        tableone = create_table_one(
+            df_matched, self.treatment_col, self.matching_covariates
+        )
         self.check_table_one(tableone, df_matched, score_cols, pihat_threshold, caliper)
 
     def search_best_match(self, df):
@@ -327,11 +381,13 @@ def search_best_match(self, df):
 
         self.original_stats = {}
         for col, trans in self.dev_cols_transformations.items():
-            self.original_stats[col] = trans(self.df[self.df[self.treatment_col] == 1][col])
+            self.original_stats[col] = trans(
+                self.df[self.df[self.treatment_col] == 1][col]
+            )
 
         # search best max pihat
         if self.verbose:
-            logger.info('SEARCHING FOR BEST PIHAT')
+            logger.info("SEARCHING FOR BEST PIHAT")
         score_cols = [self.ps_col]
         caliper = self.caliper_range[-1]
         for pihat_threshold in self.max_pihat_range:
@@ -339,9 +395,9 @@ def search_best_match(self, df):
 
         # search best score_cols
         if self.verbose:
-            logger.info('SEARCHING FOR BEST SCORE_COLS')
-        pihat_threshold = self.best_params['pihat']
-        caliper = self.caliper_range[int(self.caliper_range.shape[0]/2)]
+            logger.info("SEARCHING FOR BEST SCORE_COLS")
+        pihat_threshold = self.best_params["pihat"]
+        caliper = self.caliper_range[int(self.caliper_range.shape[0] / 2)]
         score_cols = [self.ps_col]
         while not self.pass_all:
             if len(self.cols_to_fix) == 0:
@@ -354,20 +410,20 @@ def search_best_match(self, df):
 
         # search best caliper
         if self.verbose:
-            logger.info('SEARCHING FOR BEST CALIPER')
-        score_cols = self.best_params['score_cols']
-        pihat_threshold = self.best_params['pihat']
+            logger.info("SEARCHING FOR BEST CALIPER")
+        score_cols = self.best_params["score_cols"]
+        pihat_threshold = self.best_params["pihat"]
         for caliper in self.caliper_range:
             self.match_and_check(score_cols, pihat_threshold, caliper)
 
         # summarize
         if self.verbose:
-            logger.info('\n-----\nBest params are:\n{}'.format(self.best_params))
+            logger.info("\n-----\nBest params are:\n{}".format(self.best_params))
 
         return self.best_matched
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
 
     from .features import TREATMENT_COL, SCORE_COL, GROUPBY_COL, PROPENSITY_FEATURES
     from .features import PROPENSITY_FEATURE_TRANSFORMATIONS, MATCHING_COVARIATES
@@ -375,49 +431,64 @@ def search_best_match(self, df):
     from .propensity import ElasticNetPropensityModel
 
     parser = argparse.ArgumentParser()
-    parser.add_argument('--input-file', required=True, dest='input_file')
-    parser.add_argument('--output-file', required=True, dest='output_file')
-    parser.add_argument('--treatment-col', default=TREATMENT_COL, dest='treatment_col')
-    parser.add_argument('--groupby-col', default=GROUPBY_COL, dest='groupby_col')
-    parser.add_argument('--feature-cols', nargs='+', default=PROPENSITY_FEATURES,
-                        dest='feature_cols')
-    parser.add_argument('--caliper', type=float, default=.2)
-    parser.add_argument('--replace', default=False, action='store_true')
-    parser.add_argument('--ratio', type=int, default=1)
+    parser.add_argument("--input-file", required=True, dest="input_file")
+    parser.add_argument("--output-file", required=True, dest="output_file")
+    parser.add_argument("--treatment-col", default=TREATMENT_COL, dest="treatment_col")
+    parser.add_argument("--groupby-col", default=GROUPBY_COL, dest="groupby_col")
+    parser.add_argument(
+        "--feature-cols", nargs="+", default=PROPENSITY_FEATURES, dest="feature_cols"
+    )
+    parser.add_argument("--caliper", type=float, default=0.2)
+    parser.add_argument("--replace", default=False, action="store_true")
+    parser.add_argument("--ratio", type=int, default=1)
 
     args = parser.parse_args()
 
     logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 
-    logger.info('Loading data from {}'.format(args.input_file))
+    logger.info("Loading data from {}".format(args.input_file))
     df = pd.read_csv(args.input_file)
     df[args.treatment_col] = df[args.treatment_col].astype(int)
-    logger.info('shape: {}\n{}'.format(df.shape, df.head()))
+    logger.info("shape: {}\n{}".format(df.shape, df.head()))
 
     pm = ElasticNetPropensityModel(random_state=42)
     w = df[args.treatment_col].values
-    X = load_data(data=df,
-                  features=args.feature_cols,
-                  transformations=PROPENSITY_FEATURE_TRANSFORMATIONS)
+    X = load_data(
+        data=df,
+        features=args.feature_cols,
+        transformations=PROPENSITY_FEATURE_TRANSFORMATIONS,
+    )
 
-    logger.info('Scoring with a propensity model: {}'.format(pm))
+    logger.info("Scoring with a propensity model: {}".format(pm))
     df[SCORE_COL] = pm.fit_predict(X, w)
 
-    logger.info('Balance before matching:\n{}'.format(create_table_one(data=df,
-                                                                       treatment_col=args.treatment_col,
-                                                                       features=MATCHING_COVARIATES)))
-    logger.info('Matching based on the propensity score with the nearest neighbor model')
-    psm = NearestNeighborMatch(replace=args.replace,
-                               ratio=args.ratio,
-                               random_state=42)
-    matched = psm.match_by_group(data=df,
-                                 treatment_col=args.treatment_col,
-                                 score_cols=[SCORE_COL],
-                                 groupby_col=args.groupby_col)
-    logger.info('shape: {}\n{}'.format(matched.shape, matched.head()))
-
-    logger.info('Balance after matching:\n{}'.format(create_table_one(data=matched,
-                                                                      treatment_col=args.treatment_col,
-                                                                      features=MATCHING_COVARIATES)))
+    logger.info(
+        "Balance before matching:\n{}".format(
+            create_table_one(
+                data=df, treatment_col=args.treatment_col, features=MATCHING_COVARIATES
+            )
+        )
+    )
+    logger.info(
+        "Matching based on the propensity score with the nearest neighbor model"
+    )
+    psm = NearestNeighborMatch(replace=args.replace, ratio=args.ratio, random_state=42)
+    matched = psm.match_by_group(
+        data=df,
+        treatment_col=args.treatment_col,
+        score_cols=[SCORE_COL],
+        groupby_col=args.groupby_col,
+    )
+    logger.info("shape: {}\n{}".format(matched.shape, matched.head()))
+
+    logger.info(
+        "Balance after matching:\n{}".format(
+            create_table_one(
+                data=matched,
+                treatment_col=args.treatment_col,
+                features=MATCHING_COVARIATES,
+            )
+        )
+    )
     matched.to_csv(args.output_file, index=False)
-    logger.info('Matched data saved as {}'.format(args.output_file))
+    logger.info("Matched data saved as {}".format(args.output_file))
diff --git a/causalml/metrics/__init__.py b/causalml/metrics/__init__.py
index d635e75a..0bc9d187 100644
--- a/causalml/metrics/__init__.py
+++ b/causalml/metrics/__init__.py
@@ -1,7 +1,34 @@
 from .classification import roc_auc_score, logloss, classification_metrics  # noqa
-from .regression import ape, mape, mae, rmse, r2_score, gini, smape, regression_metrics # noqa
-from .visualize import plot, plot_gain, plot_lift, plot_qini, plot_tmlegain, plot_tmleqini # noqa
-from .visualize import get_cumgain, get_cumlift, get_qini, get_tmlegain, get_tmleqini # noqa
-from .visualize import auuc_score, qini_score # noqa
-from .sensitivity import Sensitivity, SensitivityPlaceboTreatment # noqa
-from .sensitivity import SensitivityRandomCause, SensitivityRandomReplace, SensitivitySubsetData, SensitivitySelectionBias # noqa
+from .regression import (
+    ape,
+    mape,
+    mae,
+    rmse,
+    r2_score,
+    gini,
+    smape,
+    regression_metrics,
+)  # noqa
+from .visualize import (
+    plot,
+    plot_gain,
+    plot_lift,
+    plot_qini,
+    plot_tmlegain,
+    plot_tmleqini,
+)  # noqa
+from .visualize import (
+    get_cumgain,
+    get_cumlift,
+    get_qini,
+    get_tmlegain,
+    get_tmleqini,
+)  # noqa
+from .visualize import auuc_score, qini_score  # noqa
+from .sensitivity import Sensitivity, SensitivityPlaceboTreatment  # noqa
+from .sensitivity import (
+    SensitivityRandomCause,
+    SensitivityRandomReplace,
+    SensitivitySubsetData,
+    SensitivitySelectionBias,
+)  # noqa
diff --git a/causalml/metrics/classification.py b/causalml/metrics/classification.py
index 8e495036..23641a13 100644
--- a/causalml/metrics/classification.py
+++ b/causalml/metrics/classification.py
@@ -5,7 +5,7 @@
 from .regression import regression_metrics
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 def logloss(y, p):
@@ -22,7 +22,9 @@ def logloss(y, p):
     return log_loss(y, p)
 
 
-def classification_metrics(y, p, w=None, metrics={'AUC': roc_auc_score, 'Log Loss': logloss}):
+def classification_metrics(
+    y, p, w=None, metrics={"AUC": roc_auc_score, "Log Loss": logloss}
+):
     """Log metrics for classifiers.
 
     Args:
diff --git a/causalml/metrics/const.py b/causalml/metrics/const.py
index f00193e7..abca9d3e 100644
--- a/causalml/metrics/const.py
+++ b/causalml/metrics/const.py
@@ -1 +1 @@
-EPS = 1e-15
\ No newline at end of file
+EPS = 1e-15
diff --git a/causalml/metrics/regression.py b/causalml/metrics/regression.py
index c4d164af..497a01d9 100644
--- a/causalml/metrics/regression.py
+++ b/causalml/metrics/regression.py
@@ -1,13 +1,13 @@
 import logging
 import numpy as np
 from sklearn.metrics import mean_squared_error as mse
-from sklearn.metrics import mean_absolute_error as mae # noqa
-from sklearn.metrics import r2_score    # noqa
+from sklearn.metrics import mean_absolute_error as mae  # noqa
+from sklearn.metrics import r2_score  # noqa
 
 from .const import EPS
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 def ape(y, p):
@@ -47,7 +47,7 @@ def smape(y, p):
     Returns:
         e (numpy.float64): sMAPE
     """
-    return 2. * np.mean(np.abs(y - p) / (np.abs(y) + np.abs(p)))
+    return 2.0 * np.mean(np.abs(y - p) / (np.abs(y) + np.abs(p)))
 
 
 def rmse(y, p):
@@ -91,7 +91,7 @@ def gini(y, p):
     # get Lorenz curves
     l_true = np.cumsum(true_order) / np.sum(true_order)
     l_pred = np.cumsum(pred_order) / np.sum(pred_order)
-    l_ones = np.linspace(1/n_samples, 1, n_samples)
+    l_ones = np.linspace(1 / n_samples, 1, n_samples)
 
     # get Gini coefficients (area between curves)
     g_true = np.sum(l_ones - l_true)
@@ -101,7 +101,9 @@ def gini(y, p):
     return g_pred / g_true
 
 
-def regression_metrics(y, p, w=None, metrics={'RMSE': rmse, 'sMAPE': smape, 'Gini': gini}):
+def regression_metrics(
+    y, p, w=None, metrics={"RMSE": rmse, "sMAPE": smape, "Gini": gini}
+):
     """Log metrics for regressors.
 
     Args:
@@ -119,7 +121,7 @@ def regression_metrics(y, p, w=None, metrics={'RMSE': rmse, 'sMAPE': smape, 'Gin
             assert y.shape[0] == w.shape[0]
             if w.dtype != bool:
                 w = w == 1
-            logger.info('{:>8s}   (Control): {:10.4f}'.format(name, func(y[~w], p[~w])))
-            logger.info('{:>8s} (Treatment): {:10.4f}'.format(name, func(y[w], p[w])))
+            logger.info("{:>8s}   (Control): {:10.4f}".format(name, func(y[~w], p[~w])))
+            logger.info("{:>8s} (Treatment): {:10.4f}".format(name, func(y[w], p[w])))
         else:
-            logger.info('{:>8s}: {:10.4f}'.format(name, func(y, p)))
+            logger.info("{:>8s}: {:10.4f}".format(name, func(y, p)))
diff --git a/causalml/metrics/sensitivity.py b/causalml/metrics/sensitivity.py
index d05bb402..ff091d19 100644
--- a/causalml/metrics/sensitivity.py
+++ b/causalml/metrics/sensitivity.py
@@ -5,7 +5,7 @@
 import matplotlib.pyplot as plt
 from importlib import import_module
 
-logger = logging.getLogger('sensitivity')
+logger = logging.getLogger("sensitivity")
 
 
 def one_sided(alpha, p, treatment):
@@ -76,14 +76,23 @@ def alignment_att(alpha, p, treatment):
 
 
 class Sensitivity(object):
-    """ A Sensitivity Check class to support Placebo Treatment, Irrelevant Additional Confounder
+    """A Sensitivity Check class to support Placebo Treatment, Irrelevant Additional Confounder
     and Subset validation refutation methods to verify causal inference.
 
     Reference: https://github.com/microsoft/dowhy/blob/master/dowhy/causal_refuters/
     """
 
-    def __init__(self, df, inference_features, p_col, treatment_col, outcome_col,
-                learner, *args, **kwargs):
+    def __init__(
+        self,
+        df,
+        inference_features,
+        p_col,
+        treatment_col,
+        outcome_col,
+        learner,
+        *args,
+        **kwargs,
+    ):
         """Initialize.
 
         Args:
@@ -135,13 +144,20 @@ def get_ate_ci(self, X, p, treatment, y):
 
         learner = self.learner
         from ..inference.meta.tlearner import BaseTLearner
+
         if isinstance(learner, BaseTLearner):
-            ate, ate_lower, ate_upper = learner.estimate_ate(X=X, treatment=treatment, y=y)
+            ate, ate_lower, ate_upper = learner.estimate_ate(
+                X=X, treatment=treatment, y=y
+            )
         else:
             try:
-                ate, ate_lower, ate_upper = learner.estimate_ate(X=X, p=p, treatment=treatment, y=y)
+                ate, ate_lower, ate_upper = learner.estimate_ate(
+                    X=X, p=p, treatment=treatment, y=y
+                )
             except TypeError:
-                ate, ate_lower, ate_upper = learner.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True)
+                ate, ate_lower, ate_upper = learner.estimate_ate(
+                    X=X, treatment=treatment, y=y, return_ci=True
+                )
         return ate[0], ate_lower[0], ate_upper[0]
 
     @staticmethod
@@ -153,18 +169,29 @@ def get_class_object(method_name, *args, **kwargs):
             (class): Sensitivy Class
         """
 
-        method_list = ['Placebo Treatment', 'Random Cause', 'Subset Data', 'Random Replace', 'Selection Bias']
-        class_name = 'Sensitivity' + method_name.replace(' ', '')
+        method_list = [
+            "Placebo Treatment",
+            "Random Cause",
+            "Subset Data",
+            "Random Replace",
+            "Selection Bias",
+        ]
+        class_name = "Sensitivity" + method_name.replace(" ", "")
 
         try:
-            getattr(import_module('causalml.metrics.sensitivity'), class_name)
-            return getattr(import_module('causalml.metrics.sensitivity'), class_name)
+            getattr(import_module("causalml.metrics.sensitivity"), class_name)
+            return getattr(import_module("causalml.metrics.sensitivity"), class_name)
         except AttributeError:
-            raise AttributeError('{} is not an existing method for sensitiviy analysis.'.format(method_name) +
-                              ' Select one of {}'.format(method_list))
-
-    def sensitivity_analysis(self, methods, sample_size=None,
-                             confound='one_sided', alpha_range=None):
+            raise AttributeError(
+                "{} is not an existing method for sensitiviy analysis.".format(
+                    method_name
+                )
+                + " Select one of {}".format(method_list)
+            )
+
+    def sensitivity_analysis(
+        self, methods, sample_size=None, confound="one_sided", alpha_range=None
+    ):
         """Return the sensitivity data by different method
 
         Args:
@@ -181,8 +208,8 @@ def sensitivity_analysis(self, methods, sample_size=None,
         """
         if alpha_range is None:
             y = self.df[self.outcome_col]
-            iqr = y.quantile(.75) - y.quantile(.25)
-            alpha_range = np.linspace(-iqr/2, iqr/2, 11)
+            iqr = y.quantile(0.75) - y.quantile(0.25)
+            alpha_range = np.linspace(-iqr / 2, iqr / 2, 11)
             if 0 not in alpha_range:
                 alpha_range = np.append(alpha_range, 0)
         else:
@@ -190,14 +217,25 @@ def sensitivity_analysis(self, methods, sample_size=None,
 
         alpha_range.sort()
 
-        summary_df = pd.DataFrame(columns=['Method', 'ATE', 'New ATE', 'New ATE LB', 'New ATE UB'])
+        summary_df = pd.DataFrame(
+            columns=["Method", "ATE", "New ATE", "New ATE LB", "New ATE UB"]
+        )
         for method in methods:
             sens = self.get_class_object(method)
-            sens = sens(self.df, self.inference_features, self.p_col, self.treatment_col, self.outcome_col,
-                        self.learner, sample_size=sample_size, confound=confound, alpha_range=alpha_range)
-
-            if method == 'Subset Data':
-                method = method + '(sample size @{})'.format(sample_size)
+            sens = sens(
+                self.df,
+                self.inference_features,
+                self.p_col,
+                self.treatment_col,
+                self.outcome_col,
+                self.learner,
+                sample_size=sample_size,
+                confound=confound,
+                alpha_range=alpha_range,
+            )
+
+            if method == "Subset Data":
+                method = method + "(sample size @{})".format(sample_size)
 
             sens_df = sens.summary(method=method)
             summary_df = summary_df.append(sens_df)
@@ -223,9 +261,16 @@ def summary(self, method):
         ate = preds.mean()
         ate_new, ate_new_lower, ate_new_upper = self.sensitivity_estimate()
 
-        sensitivity_summary = pd.DataFrame([method_name, ate,
-                                            ate_new, ate_new_lower, ate_new_upper]).T
-        sensitivity_summary.columns = ['Method', 'ATE', 'New ATE', 'New ATE LB', 'New ATE UB']
+        sensitivity_summary = pd.DataFrame(
+            [method_name, ate, ate_new, ate_new_lower, ate_new_upper]
+        ).T
+        sensitivity_summary.columns = [
+            "Method",
+            "ATE",
+            "New ATE",
+            "New ATE LB",
+            "New ATE UB",
+        ]
         return sensitivity_summary
 
     def sensitivity_estimate(self):
@@ -233,8 +278,7 @@ def sensitivity_estimate(self):
 
 
 class SensitivityPlaceboTreatment(Sensitivity):
-    """Replaces the treatment variable with a new variable randomly generated.
-    """
+    """Replaces the treatment variable with a new variable randomly generated."""
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -259,8 +303,7 @@ def sensitivity_estimate(self):
 
 
 class SensitivityRandomCause(Sensitivity):
-    """Adds an irrelevant random covariate to the dataframe.
-    """
+    """Adds an irrelevant random covariate to the dataframe."""
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
@@ -280,22 +323,24 @@ def sensitivity_estimate(self):
 
 
 class SensitivityRandomReplace(Sensitivity):
-    """Replaces a random covariate with an irrelevant variable.
-    """
+    """Replaces a random covariate with an irrelevant variable."""
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        if 'replaced_feature' not in kwargs:
+        if "replaced_feature" not in kwargs:
             replaced_feature_index = np.random.randint(len(self.inference_features))
             self.replaced_feature = self.inference_features[replaced_feature_index]
         else:
             self.replaced_feature = kwargs["replaced_feature"]
 
     def sensitivity_estimate(self):
-        """Replaces a random covariate with an irrelevant variable.
-        """
+        """Replaces a random covariate with an irrelevant variable."""
 
-        logger.info('Replace feature {} with an random irrelevant variable'.format(self.replaced_feature))
+        logger.info(
+            "Replace feature {} with an random irrelevant variable".format(
+                self.replaced_feature
+            )
+        )
         df_new = self.df.copy()
         num_rows = self.df.shape[0]
         df_new[self.replaced_feature] = np.random.randn(num_rows)
@@ -305,18 +350,19 @@ def sensitivity_estimate(self):
         treatment_new = df_new[self.treatment_col].values
         y_new = df_new[self.outcome_col].values
 
-        ate_new, ate_new_lower, ate_new_upper = self.get_ate_ci(X_new, p_new, treatment_new, y_new)
+        ate_new, ate_new_lower, ate_new_upper = self.get_ate_ci(
+            X_new, p_new, treatment_new, y_new
+        )
         return ate_new, ate_new_lower, ate_new_upper
 
 
 class SensitivitySubsetData(Sensitivity):
-    """Takes a random subset of size sample_size of the data.
-    """
+    """Takes a random subset of size sample_size of the data."""
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.sample_size = kwargs["sample_size"]
-        assert (self.sample_size is not None)
+        assert self.sample_size is not None
 
     def sensitivity_estimate(self):
         df_new = self.df.sample(frac=self.sample_size).copy()
@@ -326,7 +372,9 @@ def sensitivity_estimate(self):
         treatment_new = df_new[self.treatment_col].values
         y_new = df_new[self.outcome_col].values
 
-        ate_new, ate_new_lower, ate_new_upper = self.get_ate_ci(X_new, p_new, treatment_new, y_new)
+        ate_new, ate_new_lower, ate_new_upper = self.get_ate_ci(
+            X_new, p_new, treatment_new, y_new
+        )
         return ate_new, ate_new_lower, ate_new_upper
 
 
@@ -342,8 +390,14 @@ class SensitivitySelectionBias(Sensitivity):
 
     """
 
-    def __init__(self, *args, confound='one_sided', alpha_range=None,
-                 sensitivity_features=None, **kwargs):
+    def __init__(
+        self,
+        *args,
+        confound="one_sided",
+        alpha_range=None,
+        sensitivity_features=None,
+        **kwargs,
+    ):
         super().__init__(*args, **kwargs)
         """Initialize.
 
@@ -353,17 +407,21 @@ def __init__(self, *args, confound='one_sided', alpha_range=None,
             sensitivity_features (list of str): ): a list of columns that to check each individual partial r-square
         """
 
-        logger.info('Only works for linear outcome models right now. Check back soon.')
-        confounding_functions = {'one_sided': one_sided,
-                                 'alignment': alignment,
-                                 'one_sided_att': one_sided_att,
-                                 'alignment_att': alignment_att}
+        logger.info("Only works for linear outcome models right now. Check back soon.")
+        confounding_functions = {
+            "one_sided": one_sided,
+            "alignment": alignment,
+            "one_sided_att": one_sided_att,
+            "alignment_att": alignment_att,
+        }
 
         try:
             confound_func = confounding_functions[confound]
         except KeyError:
-            raise NotImplementedError(f'Confounding function, {confound} is not implemented. \
-                                        Use one of {confounding_functions.keys()}')
+            raise NotImplementedError(
+                f"Confounding function, {confound} is not implemented. \
+                                        Use one of {confounding_functions.keys()}"
+            )
 
         self.confound = confound_func
 
@@ -374,8 +432,8 @@ def __init__(self, *args, confound='one_sided', alpha_range=None,
 
         if alpha_range is None:
             y = self.df[self.outcome_col]
-            iqr = y.quantile(.75) - y.quantile(.25)
-            self.alpha_range = np.linspace(-iqr/2, iqr/2, 11)
+            iqr = y.quantile(0.75) - y.quantile(0.25)
+            self.alpha_range = np.linspace(-iqr / 2, iqr / 2, 11)
             if 0 not in self.alpha_range:
                 self.alpha_range = np.append(self.alpha_range, 0)
         else:
@@ -397,17 +455,17 @@ def causalsens(self):
         sens_df = pd.DataFrame()
         for a in alpha_range:
             sens = defaultdict(list)
-            sens['alpha'] = a
+            sens["alpha"] = a
             adj = confound(a, p, treatment)
             preds_adj = y - adj
             s_preds = self.get_prediction(X, p, treatment, preds_adj)
             ate, ate_lb, ate_ub = self.get_ate_ci(X, p, treatment, preds_adj)
 
             s_preds_residul = preds_adj - s_preds
-            sens['rsqs'] = a**2*np.var(treatment)/np.var(s_preds_residul)
-            sens['New ATE'] = ate
-            sens['New ATE LB'] = ate_lb
-            sens['New ATE UB'] = ate_ub
+            sens["rsqs"] = a**2 * np.var(treatment) / np.var(s_preds_residul)
+            sens["New ATE"] = ate
+            sens["New ATE LB"] = ate_lb
+            sens["New ATE UB"] = ate_ub
             sens_df = sens_df.append(pd.DataFrame(sens, index=[0]))
 
         rss = np.sum(np.square(y - preds))
@@ -417,14 +475,14 @@ def causalsens(self):
             X_new = df_new[self.inference_features].drop(feature, axis=1).copy()
             y_new_preds = self.get_prediction(X_new, p, treatment, y)
             rss_new = np.sum(np.square(y - y_new_preds))
-            partial_rsqs.append(((rss_new - rss)/rss))
+            partial_rsqs.append(((rss_new - rss) / rss))
 
         partial_rsqs_df = pd.DataFrame([self.sensitivity_features, partial_rsqs]).T
-        partial_rsqs_df.columns = ['feature', 'partial_rsqs']
+        partial_rsqs_df.columns = ["feature", "partial_rsqs"]
 
         return sens_df, partial_rsqs_df
 
-    def summary(self, method='Selection Bias'):
+    def summary(self, method="Selection Bias"):
         """Summary report for Selection Bias Method
         Args:
             method_name (str): sensitivity analysis method
@@ -434,14 +492,22 @@ def summary(self, method='Selection Bias'):
 
         method_name = method
         sensitivity_summary = self.causalsens()[0]
-        sensitivity_summary['Method'] = [method_name + ' (alpha@' + str(round(i, 5)) + ', with r-sqaure:'
-                                         for i in sensitivity_summary.alpha]
-        sensitivity_summary['Method'] = sensitivity_summary['Method'] + sensitivity_summary['rsqs'].round(5).astype(str)
-        sensitivity_summary['ATE'] = sensitivity_summary[sensitivity_summary.alpha == 0]['New ATE']
-        return sensitivity_summary[['Method', 'ATE', 'New ATE', 'New ATE LB', 'New ATE UB']]
+        sensitivity_summary["Method"] = [
+            method_name + " (alpha@" + str(round(i, 5)) + ", with r-sqaure:"
+            for i in sensitivity_summary.alpha
+        ]
+        sensitivity_summary["Method"] = sensitivity_summary[
+            "Method"
+        ] + sensitivity_summary["rsqs"].round(5).astype(str)
+        sensitivity_summary["ATE"] = sensitivity_summary[
+            sensitivity_summary.alpha == 0
+        ]["New ATE"]
+        return sensitivity_summary[
+            ["Method", "ATE", "New ATE", "New ATE LB", "New ATE UB"]
+        ]
 
     @staticmethod
-    def plot(sens_df, partial_rsqs_df=None, type='raw', ci=False, partial_rsqs=False):
+    def plot(sens_df, partial_rsqs_df=None, type="raw", ci=False, partial_rsqs=False):
         """Plot the results of a sensitivity analysis against unmeasured
         Args:
             sens_df (pandas.DataFrame): a data frame output from causalsens
@@ -449,48 +515,70 @@ def plot(sens_df, partial_rsqs_df=None, type='raw', ci=False, partial_rsqs=False
             type (str, optional): the type of plot to draw, 'raw' or 'r.squared' are supported
             ci (bool, optional): whether plot confidence intervals
             partial_rsqs (bool, optional): whether plot partial rsquare results
-         """
+        """
 
-        if type == 'raw' and not ci:
+        if type == "raw" and not ci:
             fig, ax = plt.subplots()
-            y_max = round(sens_df['New ATE UB'].max()*1.1, 4)
-            y_min = round(sens_df['New ATE LB'].min()*0.9, 4)
-            x_max = round(sens_df.alpha.max()*1.1, 4)
-            x_min = round(sens_df.alpha.min()*0.9, 4)
+            y_max = round(sens_df["New ATE UB"].max() * 1.1, 4)
+            y_min = round(sens_df["New ATE LB"].min() * 0.9, 4)
+            x_max = round(sens_df.alpha.max() * 1.1, 4)
+            x_min = round(sens_df.alpha.min() * 0.9, 4)
             plt.ylim(y_min, y_max)
             plt.xlim(x_min, x_max)
-            ax.plot(sens_df.alpha, sens_df['New ATE'])
-        elif type == 'raw' and ci:
+            ax.plot(sens_df.alpha, sens_df["New ATE"])
+        elif type == "raw" and ci:
             fig, ax = plt.subplots()
-            y_max = round(sens_df['New ATE UB'].max()*1.1, 4)
-            y_min = round(sens_df['New ATE LB'].min()*0.9, 4)
-            x_max = round(sens_df.alpha.max()*1.1, 4)
-            x_min = round(sens_df.alpha.min()*0.9, 4)
+            y_max = round(sens_df["New ATE UB"].max() * 1.1, 4)
+            y_min = round(sens_df["New ATE LB"].min() * 0.9, 4)
+            x_max = round(sens_df.alpha.max() * 1.1, 4)
+            x_min = round(sens_df.alpha.min() * 0.9, 4)
             plt.ylim(y_min, y_max)
             plt.xlim(x_min, x_max)
-            ax.fill_between(sens_df.alpha, sens_df['New ATE LB'], sens_df['New ATE UB'], color='gray', alpha=0.5)
-            ax.plot(sens_df.alpha, sens_df['New ATE'])
-        elif type == 'r.squared' and ci:
+            ax.fill_between(
+                sens_df.alpha,
+                sens_df["New ATE LB"],
+                sens_df["New ATE UB"],
+                color="gray",
+                alpha=0.5,
+            )
+            ax.plot(sens_df.alpha, sens_df["New ATE"])
+        elif type == "r.squared" and ci:
             fig, ax = plt.subplots()
-            y_max = round(sens_df['New ATE UB'].max()*1.1, 4)
-            y_min = round(sens_df['New ATE LB'].min()*0.9, 4)
+            y_max = round(sens_df["New ATE UB"].max() * 1.1, 4)
+            y_min = round(sens_df["New ATE LB"].min() * 0.9, 4)
             plt.ylim(y_min, y_max)
-            ax.fill_between(sens_df.rsqs, sens_df['New ATE LB'], sens_df['New ATE UB'], color='gray', alpha=0.5)
-            ax.plot(sens_df.rsqs, sens_df['New ATE'])
+            ax.fill_between(
+                sens_df.rsqs,
+                sens_df["New ATE LB"],
+                sens_df["New ATE UB"],
+                color="gray",
+                alpha=0.5,
+            )
+            ax.plot(sens_df.rsqs, sens_df["New ATE"])
             if partial_rsqs:
-                plt.scatter(partial_rsqs_df.partial_rsqs,
-                        list(sens_df[sens_df.alpha == 0]['New ATE']) * partial_rsqs_df.shape[0],
-                        marker='x', color="red", linewidth=10)
-        elif type == 'r.squared' and not ci:
+                plt.scatter(
+                    partial_rsqs_df.partial_rsqs,
+                    list(sens_df[sens_df.alpha == 0]["New ATE"])
+                    * partial_rsqs_df.shape[0],
+                    marker="x",
+                    color="red",
+                    linewidth=10,
+                )
+        elif type == "r.squared" and not ci:
             fig, ax = plt.subplots()
-            y_max = round(sens_df['New ATE UB'].max()*1.1, 4)
-            y_min = round(sens_df['New ATE LB'].min()*0.9, 4)
+            y_max = round(sens_df["New ATE UB"].max() * 1.1, 4)
+            y_min = round(sens_df["New ATE LB"].min() * 0.9, 4)
             plt.ylim(y_min, y_max)
-            plt.plot(sens_df.rsqs, sens_df['New ATE'])
+            plt.plot(sens_df.rsqs, sens_df["New ATE"])
             if partial_rsqs:
-                plt.scatter(partial_rsqs_df.partial_rsqs,
-                        list(sens_df[sens_df.alpha == 0]['New ATE']) * partial_rsqs_df.shape[0],
-                        marker='x', color="red", linewidth=10)
+                plt.scatter(
+                    partial_rsqs_df.partial_rsqs,
+                    list(sens_df[sens_df.alpha == 0]["New ATE"])
+                    * partial_rsqs_df.shape[0],
+                    marker="x",
+                    color="red",
+                    linewidth=10,
+                )
 
     @staticmethod
     def partial_rsqs_confounding(sens_df, feature_name, partial_rsqs_value, range=0.01):
@@ -506,16 +594,27 @@ def partial_rsqs_confounding(sens_df, feature_name, partial_rsqs_value, range=0.
 
         rsqs_dict = []
         for i in sens_df.rsqs:
-            if partial_rsqs_value - partial_rsqs_value*range < i < partial_rsqs_value + partial_rsqs_value*range:
+            if (
+                partial_rsqs_value - partial_rsqs_value * range
+                < i
+                < partial_rsqs_value + partial_rsqs_value * range
+            ):
                 rsqs_dict.append(i)
 
         if rsqs_dict:
             confounding_min = sens_df[sens_df.rsqs.isin(rsqs_dict)].alpha.min()
             confounding_max = sens_df[sens_df.rsqs.isin(rsqs_dict)].alpha.max()
-            logger.info('Only works for linear outcome models right now. Check back soon.')
-            logger.info('For feature {} with partial rsquare {} confounding amount with possible values: {}, {}'.format(
-                        feature_name, partial_rsqs_value, confounding_min, confounding_max))
+            logger.info(
+                "Only works for linear outcome models right now. Check back soon."
+            )
+            logger.info(
+                "For feature {} with partial rsquare {} confounding amount with possible values: {}, {}".format(
+                    feature_name, partial_rsqs_value, confounding_min, confounding_max
+                )
+            )
             return [confounding_min, confounding_max]
         else:
-            logger.info('Cannot find correponding rsquare value within the range for input, please edit confounding', 'values vector or use a larger range and try again')
-
+            logger.info(
+                "Cannot find correponding rsquare value within the range for input, please edit confounding",
+                "values vector or use a larger range and try again",
+            )
diff --git a/causalml/metrics/visualize.py b/causalml/metrics/visualize.py
index 72fcdb75..3b4fef48 100644
--- a/causalml/metrics/visualize.py
+++ b/causalml/metrics/visualize.py
@@ -7,14 +7,14 @@
 from ..inference.meta.tmle import TMLELearner
 
 
-plt.style.use('fivethirtyeight')
+plt.style.use("fivethirtyeight")
 sns.set_palette("Paired")
-RANDOM_COL = 'Random'
+RANDOM_COL = "Random"
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
-def plot(df, kind='gain', tmle=False, n=100, figsize=(8, 8), *args, **kwarg):
+def plot(df, kind="gain", tmle=False, n=100, figsize=(8, 8), *args, **kwarg):
     """Plot one of the lift/gain/Qini charts of model estimates.
 
     A factory method for `plot_lift()`, `plot_gain()`, `plot_qini()`, `plot_tmlegain()` and `plot_tmleqini()`.
@@ -25,16 +25,19 @@ def plot(df, kind='gain', tmle=False, n=100, figsize=(8, 8), *args, **kwarg):
         kind (str, optional): the kind of plot to draw. 'lift', 'gain', and 'qini' are supported.
         n (int, optional): the number of samples to be used for plotting.
     """
-    catalog = {'lift': get_cumlift,
-               'gain': get_cumgain,
-               'qini': get_qini}
+    catalog = {"lift": get_cumlift, "gain": get_cumgain, "qini": get_qini}
 
-    assert kind in catalog.keys(), '{} plot is not implemented. Select one of {}'.format(kind, catalog.keys())
+    assert (
+        kind in catalog.keys()
+    ), "{} plot is not implemented. Select one of {}".format(kind, catalog.keys())
 
     if tmle:
-        ci_catalog = {'gain': plot_tmlegain,
-                      'qini': plot_tmleqini}
-        assert kind in ci_catalog.keys(), '{} plot is not implemented. Select one of {}'.format(kind, ci_catalog.keys())
+        ci_catalog = {"gain": plot_tmlegain, "qini": plot_tmleqini}
+        assert (
+            kind in ci_catalog.keys()
+        ), "{} plot is not implemented. Select one of {}".format(
+            kind, ci_catalog.keys()
+        )
 
         ci_catalog[kind](df, *args, **kwarg)
     else:
@@ -44,12 +47,13 @@ def plot(df, kind='gain', tmle=False, n=100, figsize=(8, 8), *args, **kwarg):
             df = df.iloc[np.linspace(0, df.index[-1], n, endpoint=True)]
 
         df.plot(figsize=figsize)
-        plt.xlabel('Population')
-        plt.ylabel('{}'.format(kind.title()))
+        plt.xlabel("Population")
+        plt.ylabel("{}".format(kind.title()))
 
 
-def get_cumlift(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
-                random_seed=42):
+def get_cumlift(
+    df, outcome_col="y", treatment_col="w", treatment_effect_col="tau", random_seed=42
+):
     """Get average uplifts of model estimates in cumulative population.
 
     If the true treatment effect is provided (e.g. in synthetic data), it's calculated
@@ -74,19 +78,25 @@ def get_cumlift(df, outcome_col='y', treatment_col='w', treatment_effect_col='ta
         (pandas.DataFrame): average uplifts of model estimates in cumulative population
     """
 
-    assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
-            treatment_effect_col in df.columns)
+    assert (
+        (outcome_col in df.columns)
+        and (treatment_col in df.columns)
+        or treatment_effect_col in df.columns
+    )
 
     df = df.copy()
     np.random.seed(random_seed)
     random_cols = []
     for i in range(10):
-        random_col = '__random_{}__'.format(i)
+        random_col = "__random_{}__".format(i)
         df[random_col] = np.random.rand(df.shape[0])
         random_cols.append(random_col)
 
-    model_names = [x for x in df.columns if x not in [outcome_col, treatment_col,
-                                                      treatment_effect_col]]
+    model_names = [
+        x
+        for x in df.columns
+        if x not in [outcome_col, treatment_col, treatment_effect_col]
+    ]
 
     lift = []
     for i, col in enumerate(model_names):
@@ -100,15 +110,22 @@ def get_cumlift(df, outcome_col='y', treatment_col='w', treatment_effect_col='ta
         else:
             # When treatment_effect_col is not given, use outcome_col and treatment_col
             # to calculate the average treatment_effects of cumulative population.
-            sorted_df['cumsum_tr'] = sorted_df[treatment_col].cumsum()
-            sorted_df['cumsum_ct'] = sorted_df.index.values - sorted_df['cumsum_tr']
-            sorted_df['cumsum_y_tr'] = (sorted_df[outcome_col] * sorted_df[treatment_col]).cumsum()
-            sorted_df['cumsum_y_ct'] = (sorted_df[outcome_col] * (1 - sorted_df[treatment_col])).cumsum()
-
-            lift.append(sorted_df['cumsum_y_tr'] / sorted_df['cumsum_tr'] - sorted_df['cumsum_y_ct'] / sorted_df['cumsum_ct'])
-
-    lift = pd.concat(lift, join='inner', axis=1)
-    lift.loc[0] = np.zeros((lift.shape[1], ))
+            sorted_df["cumsum_tr"] = sorted_df[treatment_col].cumsum()
+            sorted_df["cumsum_ct"] = sorted_df.index.values - sorted_df["cumsum_tr"]
+            sorted_df["cumsum_y_tr"] = (
+                sorted_df[outcome_col] * sorted_df[treatment_col]
+            ).cumsum()
+            sorted_df["cumsum_y_ct"] = (
+                sorted_df[outcome_col] * (1 - sorted_df[treatment_col])
+            ).cumsum()
+
+            lift.append(
+                sorted_df["cumsum_y_tr"] / sorted_df["cumsum_tr"]
+                - sorted_df["cumsum_y_ct"] / sorted_df["cumsum_ct"]
+            )
+
+    lift = pd.concat(lift, join="inner", axis=1)
+    lift.loc[0] = np.zeros((lift.shape[1],))
     lift = lift.sort_index().interpolate()
 
     lift.columns = model_names
@@ -118,8 +135,14 @@ def get_cumlift(df, outcome_col='y', treatment_col='w', treatment_effect_col='ta
     return lift
 
 
-def get_cumgain(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
-                normalize=False, random_seed=42):
+def get_cumgain(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=False,
+    random_seed=42,
+):
     """Get cumulative gains of model estimates in population.
 
     If the true treatment effect is provided (e.g. in synthetic data), it's calculated
@@ -145,7 +168,9 @@ def get_cumgain(df, outcome_col='y', treatment_col='w', treatment_effect_col='ta
         (pandas.DataFrame): cumulative gains of model estimates in population
     """
 
-    lift = get_cumlift(df, outcome_col, treatment_col, treatment_effect_col, random_seed)
+    lift = get_cumlift(
+        df, outcome_col, treatment_col, treatment_effect_col, random_seed
+    )
 
     # cumulative gain = cumulative lift x (# of population)
     gain = lift.mul(lift.index.values, axis=0)
@@ -156,8 +181,14 @@ def get_cumgain(df, outcome_col='y', treatment_col='w', treatment_effect_col='ta
     return gain
 
 
-def get_qini(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
-             normalize=False, random_seed=42):
+def get_qini(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=False,
+    random_seed=42,
+):
     """Get Qini of model estimates in population.
 
     If the true treatment effect is provided (e.g. in synthetic data), it's calculated
@@ -182,43 +213,52 @@ def get_qini(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
     Returns:
         (pandas.DataFrame): cumulative gains of model estimates in population
     """
-    assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
-            treatment_effect_col in df.columns)
+    assert (
+        (outcome_col in df.columns)
+        and (treatment_col in df.columns)
+        or treatment_effect_col in df.columns
+    )
 
     df = df.copy()
     np.random.seed(random_seed)
     random_cols = []
     for i in range(10):
-        random_col = '__random_{}__'.format(i)
+        random_col = "__random_{}__".format(i)
         df[random_col] = np.random.rand(df.shape[0])
         random_cols.append(random_col)
 
-    model_names = [x for x in df.columns if x not in [outcome_col, treatment_col,
-                                                      treatment_effect_col]]
+    model_names = [
+        x
+        for x in df.columns
+        if x not in [outcome_col, treatment_col, treatment_effect_col]
+    ]
 
     qini = []
     for i, col in enumerate(model_names):
         df = df.sort_values(col, ascending=False).reset_index(drop=True)
         df.index = df.index + 1
-        df['cumsum_tr'] = df[treatment_col].cumsum()
+        df["cumsum_tr"] = df[treatment_col].cumsum()
 
         if treatment_effect_col in df.columns:
             # When treatment_effect_col is given, use it to calculate the average treatment effects
             # of cumulative population.
-            l = df[treatment_effect_col].cumsum() / df.index * df['cumsum_tr']
+            l = df[treatment_effect_col].cumsum() / df.index * df["cumsum_tr"]
         else:
             # When treatment_effect_col is not given, use outcome_col and treatment_col
             # to calculate the average treatment_effects of cumulative population.
-            df['cumsum_ct'] = df.index.values - df['cumsum_tr']
-            df['cumsum_y_tr'] = (df[outcome_col] * df[treatment_col]).cumsum()
-            df['cumsum_y_ct'] = (df[outcome_col] * (1 - df[treatment_col])).cumsum()
+            df["cumsum_ct"] = df.index.values - df["cumsum_tr"]
+            df["cumsum_y_tr"] = (df[outcome_col] * df[treatment_col]).cumsum()
+            df["cumsum_y_ct"] = (df[outcome_col] * (1 - df[treatment_col])).cumsum()
 
-            l = df['cumsum_y_tr'] - df['cumsum_y_ct'] * df['cumsum_tr'] / df['cumsum_ct']
+            l = (
+                df["cumsum_y_tr"]
+                - df["cumsum_y_ct"] * df["cumsum_tr"] / df["cumsum_ct"]
+            )
 
         qini.append(l)
 
-    qini = pd.concat(qini, join='inner', axis=1)
-    qini.loc[0] = np.zeros((qini.shape[1], ))
+    qini = pd.concat(qini, join="inner", axis=1)
+    qini.loc[0] = np.zeros((qini.shape[1],))
     qini = qini.sort_index().interpolate()
 
     qini.columns = model_names
@@ -231,9 +271,18 @@ def get_qini(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
     return qini
 
 
-def get_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learning_rate=.05, n_estimators=300),
-                 outcome_col='y', treatment_col='w', p_col='p', n_segment=5, cv=None,
-                 calibrate_propensity=True, ci=False):
+def get_tmlegain(
+    df,
+    inference_col,
+    learner=LGBMRegressor(num_leaves=64, learning_rate=0.05, n_estimators=300),
+    outcome_col="y",
+    treatment_col="w",
+    p_col="p",
+    n_segment=5,
+    cv=None,
+    calibrate_propensity=True,
+    ci=False,
+):
     """Get TMLE based average uplifts of model estimates of segments.
 
     Args:
@@ -250,46 +299,62 @@ def get_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learnin
     Returns:
         (pandas.DataFrame): cumulative gains of model estimates based of TMLE
     """
-    assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
-            p_col in df.columns)
+    assert (
+        (outcome_col in df.columns)
+        and (treatment_col in df.columns)
+        or p_col in df.columns
+    )
 
     inference_col = [x for x in inference_col if x in df.columns]
 
     # Initialize TMLE
     tmle = TMLELearner(learner, cv=cv, calibrate_propensity=calibrate_propensity)
-    ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(X=df[inference_col],
-                                                        p=df[p_col],
-                                                        treatment=df[treatment_col],
-                                                        y=df[outcome_col])
+    ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(
+        X=df[inference_col], p=df[p_col], treatment=df[treatment_col], y=df[outcome_col]
+    )
 
     df = df.copy()
-    model_names = [x for x in df.columns if x not in [outcome_col, treatment_col, p_col] + inference_col]
+    model_names = [
+        x
+        for x in df.columns
+        if x not in [outcome_col, treatment_col, p_col] + inference_col
+    ]
 
     lift = []
     lift_lb = []
     lift_ub = []
 
     for col in model_names:
-        ate_model, ate_model_lb, ate_model_ub = tmle.estimate_ate(X=df[inference_col],
-                                                                  p=df[p_col],
-                                                                  treatment=df[treatment_col],
-                                                                  y=df[outcome_col],
-                                                                  segment=pd.qcut(df[col], n_segment, labels=False))
-        lift_model = [0.] * (n_segment + 1)
+        ate_model, ate_model_lb, ate_model_ub = tmle.estimate_ate(
+            X=df[inference_col],
+            p=df[p_col],
+            treatment=df[treatment_col],
+            y=df[outcome_col],
+            segment=pd.qcut(df[col], n_segment, labels=False),
+        )
+        lift_model = [0.0] * (n_segment + 1)
         lift_model[n_segment] = ate_all[0]
         for i in range(1, n_segment):
-            lift_model[i] = ate_model[0][n_segment - i] * (1/n_segment) + lift_model[i - 1]
+            lift_model[i] = (
+                ate_model[0][n_segment - i] * (1 / n_segment) + lift_model[i - 1]
+            )
         lift.append(lift_model)
 
         if ci:
-            lift_lb_model = [0.] * (n_segment + 1)
+            lift_lb_model = [0.0] * (n_segment + 1)
             lift_lb_model[n_segment] = ate_all_lb[0]
 
-            lift_ub_model = [0.] * (n_segment + 1)
+            lift_ub_model = [0.0] * (n_segment + 1)
             lift_ub_model[n_segment] = ate_all_ub[0]
             for i in range(1, n_segment):
-                lift_lb_model[i] = ate_model_lb[0][n_segment - i] * (1/n_segment) + lift_lb_model[i - 1]
-                lift_ub_model[i] = ate_model_ub[0][n_segment - i] * (1/n_segment) + lift_ub_model[i - 1]
+                lift_lb_model[i] = (
+                    ate_model_lb[0][n_segment - i] * (1 / n_segment)
+                    + lift_lb_model[i - 1]
+                )
+                lift_ub_model[i] = (
+                    ate_model_ub[0][n_segment - i] * (1 / n_segment)
+                    + lift_ub_model[i - 1]
+                )
 
             lift_lb.append(lift_lb_model)
             lift_ub.append(lift_ub_model)
@@ -305,15 +370,25 @@ def get_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learnin
         lift_ub.columns = [x + " UB" for x in model_names]
         lift = pd.concat([lift, lift_lb, lift_ub], axis=1)
 
-    lift.index = lift.index/n_segment
-    lift[RANDOM_COL] = np.linspace(0, 1, n_segment + 1)*ate_all[0]
+    lift.index = lift.index / n_segment
+    lift[RANDOM_COL] = np.linspace(0, 1, n_segment + 1) * ate_all[0]
 
     return lift
 
 
-def get_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learning_rate=.05, n_estimators=300),
-                 outcome_col='y', treatment_col='w', p_col='p', n_segment=5, cv=None,
-                 calibrate_propensity=True, ci=False, normalize=False):
+def get_tmleqini(
+    df,
+    inference_col,
+    learner=LGBMRegressor(num_leaves=64, learning_rate=0.05, n_estimators=300),
+    outcome_col="y",
+    treatment_col="w",
+    p_col="p",
+    n_segment=5,
+    cv=None,
+    calibrate_propensity=True,
+    ci=False,
+    normalize=False,
+):
     """Get TMLE based Qini of model estimates by segments.
 
     Args:
@@ -330,35 +405,45 @@ def get_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learnin
     Returns:
         (pandas.DataFrame): cumulative gains of model estimates based of TMLE
     """
-    assert ((outcome_col in df.columns) and (treatment_col in df.columns) or
-            p_col in df.columns)
+    assert (
+        (outcome_col in df.columns)
+        and (treatment_col in df.columns)
+        or p_col in df.columns
+    )
 
     inference_col = [x for x in inference_col if x in df.columns]
 
     # Initialize TMLE
     tmle = TMLELearner(learner, cv=cv, calibrate_propensity=calibrate_propensity)
-    ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(X=df[inference_col],
-                                                        p=df[p_col],
-                                                        treatment=df[treatment_col],
-                                                        y=df[outcome_col])
+    ate_all, ate_all_lb, ate_all_ub = tmle.estimate_ate(
+        X=df[inference_col], p=df[p_col], treatment=df[treatment_col], y=df[outcome_col]
+    )
 
     df = df.copy()
-    model_names = [x for x in df.columns if x not in [outcome_col, treatment_col, p_col] + inference_col]
+    model_names = [
+        x
+        for x in df.columns
+        if x not in [outcome_col, treatment_col, p_col] + inference_col
+    ]
 
     qini = []
     qini_lb = []
     qini_ub = []
 
     for col in model_names:
-        ate_model, ate_model_lb, ate_model_ub = tmle.estimate_ate(X=df[inference_col],
-                                                                  p=df[p_col],
-                                                                  treatment=df[treatment_col],
-                                                                  y=df[outcome_col],
-                                                                  segment=pd.qcut(df[col], n_segment, labels=False))
+        ate_model, ate_model_lb, ate_model_ub = tmle.estimate_ate(
+            X=df[inference_col],
+            p=df[p_col],
+            treatment=df[treatment_col],
+            y=df[outcome_col],
+            segment=pd.qcut(df[col], n_segment, labels=False),
+        )
 
         qini_model = [0]
         for i in range(1, n_segment):
-            n_tr = df[pd.qcut(df[col], n_segment, labels=False) == (n_segment - i)][treatment_col].sum()
+            n_tr = df[pd.qcut(df[col], n_segment, labels=False) == (n_segment - i)][
+                treatment_col
+            ].sum()
             qini_model.append(ate_model[0][n_segment - i] * n_tr)
 
         qini.append(qini_model)
@@ -367,7 +452,9 @@ def get_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learnin
             qini_lb_model = [0]
             qini_ub_model = [0]
             for i in range(1, n_segment):
-                n_tr = df[pd.qcut(df[col], n_segment, labels=False) == (n_segment - i)][treatment_col].sum()
+                n_tr = df[pd.qcut(df[col], n_segment, labels=False) == (n_segment - i)][
+                    treatment_col
+                ].sum()
                 qini_lb_model.append(ate_model_lb[0][n_segment - i] * n_tr)
                 qini_ub_model.append(ate_model_ub[0][n_segment - i] * n_tr)
 
@@ -387,14 +474,24 @@ def get_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learnin
 
     qini = qini.cumsum()
     qini.loc[n_segment] = ate_all[0] * df[treatment_col].sum()
-    qini[RANDOM_COL] = np.linspace(0, 1, n_segment + 1) * ate_all[0] * df[treatment_col].sum()
+    qini[RANDOM_COL] = (
+        np.linspace(0, 1, n_segment + 1) * ate_all[0] * df[treatment_col].sum()
+    )
     qini.index = np.linspace(0, 1, n_segment + 1) * df.shape[0]
 
     return qini
 
 
-def plot_gain(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
-              normalize=False, random_seed=42, n=100, figsize=(8, 8)):
+def plot_gain(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=False,
+    random_seed=42,
+    n=100,
+    figsize=(8, 8),
+):
     """Plot the cumulative gain chart (or uplift curve) of model estimates.
 
     If the true treatment effect is provided (e.g. in synthetic data), it's calculated
@@ -418,12 +515,28 @@ def plot_gain(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau'
         n (int, optional): the number of samples to be used for plotting
     """
 
-    plot(df, kind='gain', n=n, figsize=figsize, outcome_col=outcome_col, treatment_col=treatment_col,
-         treatment_effect_col=treatment_effect_col, normalize=normalize, random_seed=random_seed)
-
-
-def plot_lift(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
-              random_seed=42, n=100, figsize=(8, 8)):
+    plot(
+        df,
+        kind="gain",
+        n=n,
+        figsize=figsize,
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        treatment_effect_col=treatment_effect_col,
+        normalize=normalize,
+        random_seed=random_seed,
+    )
+
+
+def plot_lift(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    random_seed=42,
+    n=100,
+    figsize=(8, 8),
+):
     """Plot the lift chart of model estimates in cumulative population.
 
     If the true treatment effect is provided (e.g. in synthetic data), it's calculated
@@ -446,12 +559,28 @@ def plot_lift(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau'
         n (int, optional): the number of samples to be used for plotting
     """
 
-    plot(df, kind='lift', n=n, figsize=figsize, outcome_col=outcome_col, treatment_col=treatment_col,
-         treatment_effect_col=treatment_effect_col, random_seed=random_seed)
-
-
-def plot_qini(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau',
-              normalize=False, random_seed=42, n=100, figsize=(8, 8)):
+    plot(
+        df,
+        kind="lift",
+        n=n,
+        figsize=figsize,
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        treatment_effect_col=treatment_effect_col,
+        random_seed=random_seed,
+    )
+
+
+def plot_qini(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=False,
+    random_seed=42,
+    n=100,
+    figsize=(8, 8),
+):
     """Plot the Qini chart (or uplift curve) of model estimates.
 
     If the true treatment effect is provided (e.g. in synthetic data), it's calculated
@@ -476,13 +605,32 @@ def plot_qini(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau'
         ci (bool, optional): whether return confidence intervals for ATE or not
     """
 
-    plot(df, kind='qini', n=n, figsize=figsize, outcome_col=outcome_col, treatment_col=treatment_col,
-         treatment_effect_col=treatment_effect_col, normalize=normalize, random_seed=random_seed)
-
-
-def plot_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learning_rate=.05, n_estimators=300),
-                  outcome_col='y', treatment_col='w', p_col='tau', n_segment=5, cv=None,
-                  calibrate_propensity=True, ci=False, figsize=(8, 8)):
+    plot(
+        df,
+        kind="qini",
+        n=n,
+        figsize=figsize,
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        treatment_effect_col=treatment_effect_col,
+        normalize=normalize,
+        random_seed=random_seed,
+    )
+
+
+def plot_tmlegain(
+    df,
+    inference_col,
+    learner=LGBMRegressor(num_leaves=64, learning_rate=0.05, n_estimators=300),
+    outcome_col="y",
+    treatment_col="w",
+    p_col="tau",
+    n_segment=5,
+    cv=None,
+    calibrate_propensity=True,
+    ci=False,
+    figsize=(8, 8),
+):
     """Plot the lift chart based of TMLE estimation
 
     Args:
@@ -497,9 +645,18 @@ def plot_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learni
         calibrate_propensity (bool, optional): whether calibrate propensity score or not
         ci (bool, optional): whether return confidence intervals for ATE or not
     """
-    plot_df = get_tmlegain(df, learner=learner, inference_col=inference_col, outcome_col=outcome_col,
-                           treatment_col=treatment_col, p_col=p_col, n_segment=n_segment, cv=cv,
-                           calibrate_propensity=calibrate_propensity, ci=ci)
+    plot_df = get_tmlegain(
+        df,
+        learner=learner,
+        inference_col=inference_col,
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        p_col=p_col,
+        n_segment=n_segment,
+        cv=cv,
+        calibrate_propensity=calibrate_propensity,
+        ci=ci,
+    )
     if ci:
         model_names = [x.replace(" LB", "") for x in plot_df.columns]
         model_names = list(set([x.replace(" UB", "") for x in model_names]))
@@ -512,9 +669,15 @@ def plot_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learni
             lb_col = col + " LB"
             up_col = col + " UB"
 
-            if col != 'Random':
+            if col != "Random":
                 ax.plot(plot_df.index, plot_df[col], color=cmap(cindex))
-                ax.fill_between(plot_df.index, plot_df[lb_col], plot_df[up_col], color=cmap(cindex), alpha=0.25)
+                ax.fill_between(
+                    plot_df.index,
+                    plot_df[lb_col],
+                    plot_df[up_col],
+                    color=cmap(cindex),
+                    alpha=0.25,
+                )
             else:
                 ax.plot(plot_df.index, plot_df[col], color=cmap(cindex))
             cindex += 1
@@ -523,14 +686,24 @@ def plot_tmlegain(df, inference_col, learner=LGBMRegressor(num_leaves=64, learni
     else:
         plot_df.plot(figsize=figsize)
 
-    plt.xlabel('Population')
-    plt.ylabel('Gain')
+    plt.xlabel("Population")
+    plt.ylabel("Gain")
     plt.show()
 
 
-def plot_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learning_rate=.05, n_estimators=300),
-                  outcome_col='y', treatment_col='w', p_col='tau', n_segment=5, cv=None,
-                  calibrate_propensity=True, ci=False, figsize=(8, 8)):
+def plot_tmleqini(
+    df,
+    inference_col,
+    learner=LGBMRegressor(num_leaves=64, learning_rate=0.05, n_estimators=300),
+    outcome_col="y",
+    treatment_col="w",
+    p_col="tau",
+    n_segment=5,
+    cv=None,
+    calibrate_propensity=True,
+    ci=False,
+    figsize=(8, 8),
+):
     """Plot the qini chart based of TMLE estimation
 
     Args:
@@ -545,9 +718,18 @@ def plot_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learni
         calibrate_propensity (bool, optional): whether calibrate propensity score or not
         ci (bool, optional): whether return confidence intervals for ATE or not
     """
-    plot_df = get_tmleqini(df, learner=learner, inference_col=inference_col, outcome_col=outcome_col,
-                           treatment_col=treatment_col, p_col=p_col, n_segment=n_segment, cv=cv,
-                           calibrate_propensity=calibrate_propensity, ci=ci)
+    plot_df = get_tmleqini(
+        df,
+        learner=learner,
+        inference_col=inference_col,
+        outcome_col=outcome_col,
+        treatment_col=treatment_col,
+        p_col=p_col,
+        n_segment=n_segment,
+        cv=cv,
+        calibrate_propensity=calibrate_propensity,
+        ci=ci,
+    )
     if ci:
         model_names = [x.replace(" LB", "") for x in plot_df.columns]
         model_names = list(set([x.replace(" UB", "") for x in model_names]))
@@ -560,9 +742,15 @@ def plot_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learni
             lb_col = col + " LB"
             up_col = col + " UB"
 
-            if col != 'Random':
+            if col != "Random":
                 ax.plot(plot_df.index, plot_df[col], color=cmap(cindex))
-                ax.fill_between(plot_df.index, plot_df[lb_col], plot_df[up_col], color=cmap(cindex), alpha=0.25)
+                ax.fill_between(
+                    plot_df.index,
+                    plot_df[lb_col],
+                    plot_df[up_col],
+                    color=cmap(cindex),
+                    alpha=0.25,
+                )
             else:
                 ax.plot(plot_df.index, plot_df[col], color=cmap(cindex))
             cindex += 1
@@ -571,13 +759,21 @@ def plot_tmleqini(df, inference_col, learner=LGBMRegressor(num_leaves=64, learni
     else:
         plot_df.plot(figsize=figsize)
 
-    plt.xlabel('Population')
-    plt.ylabel('Qini')
+    plt.xlabel("Population")
+    plt.ylabel("Qini")
     plt.show()
 
 
-def auuc_score(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau', normalize=True,
-               tmle=False, *args, **kwarg):
+def auuc_score(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=True,
+    tmle=False,
+    *args,
+    **kwarg
+):
     """Calculate the AUUC (Area Under the Uplift Curve) score.
 
      Args:
@@ -592,14 +788,26 @@ def auuc_score(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau
     """
 
     if not tmle:
-        cumgain = get_cumgain(df, outcome_col, treatment_col, treatment_effect_col, normalize)
+        cumgain = get_cumgain(
+            df, outcome_col, treatment_col, treatment_effect_col, normalize
+        )
     else:
-        cumgain = get_tmlegain(df, outcome_col=outcome_col, treatment_col=treatment_col, *args, **kwarg)
+        cumgain = get_tmlegain(
+            df, outcome_col=outcome_col, treatment_col=treatment_col, *args, **kwarg
+        )
     return cumgain.sum() / cumgain.shape[0]
 
 
-def qini_score(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau', normalize=True,
-               tmle=False, *args, **kwarg):
+def qini_score(
+    df,
+    outcome_col="y",
+    treatment_col="w",
+    treatment_effect_col="tau",
+    normalize=True,
+    tmle=False,
+    *args,
+    **kwarg
+):
     """Calculate the Qini score: the area between the Qini curves of a model and random.
 
     For details, see Radcliffe (2007), `Using Control Group to Target on Predicted Lift:
@@ -619,11 +827,13 @@ def qini_score(df, outcome_col='y', treatment_col='w', treatment_effect_col='tau
     if not tmle:
         qini = get_qini(df, outcome_col, treatment_col, treatment_effect_col, normalize)
     else:
-        qini = get_tmleqini(df, outcome_col=outcome_col, treatment_col=treatment_col, *args, **kwarg)
+        qini = get_tmleqini(
+            df, outcome_col=outcome_col, treatment_col=treatment_col, *args, **kwarg
+        )
     return (qini.sum(axis=0) - qini[RANDOM_COL].sum()) / qini.shape[0]
 
 
-def plot_ps_diagnostics(df, covariate_col, treatment_col='w', p_col='p'):
+def plot_ps_diagnostics(df, covariate_col, treatment_col="w", p_col="p"):
     """Plot covariate balances (standardized differences between the treatment and the control)
     before and after weighting the sample using the inverse probability of treatment weights.
 
@@ -645,40 +855,43 @@ def plot_ps_diagnostics(df, covariate_col, treatment_col='w', p_col='p'):
     diffs_post = get_std_diffs(X, W, IPTW, weighted=True)
     num_unbal_post = (np.abs(diffs_post) > 0.1).sum()[0]
 
-    diff_plot = _plot_std_diffs(diffs_pre,
-                                num_unbal_pre,
-                                diffs_post,
-                                num_unbal_post)
+    diff_plot = _plot_std_diffs(diffs_pre, num_unbal_pre, diffs_post, num_unbal_post)
 
     return diff_plot
 
 
 def _plot_std_diffs(diffs_pre, num_unbal_pre, diffs_post, num_unbal_post):
-    fig, (ax1, ax2) = plt.subplots(
-        1, 2, figsize=(15, 10), sharex=True, sharey=True)
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 10), sharex=True, sharey=True)
 
-    color = '#EA2566'
+    color = "#EA2566"
 
     sns.stripplot(diffs_pre.iloc[:, 0], diffs_pre.index, ax=ax1)
-    ax1.set_xlabel("Before. Number of unbalanced covariates: {num_unbal}".format(
-        num_unbal=num_unbal_pre), fontsize=14)
-    ax1.axvline(x=-0.1, ymin=0, ymax=1, color=color, linestyle='--')
-    ax1.axvline(x=0.1, ymin=0, ymax=1, color=color, linestyle='--')
+    ax1.set_xlabel(
+        "Before. Number of unbalanced covariates: {num_unbal}".format(
+            num_unbal=num_unbal_pre
+        ),
+        fontsize=14,
+    )
+    ax1.axvline(x=-0.1, ymin=0, ymax=1, color=color, linestyle="--")
+    ax1.axvline(x=0.1, ymin=0, ymax=1, color=color, linestyle="--")
 
     sns.stripplot(diffs_post.iloc[:, 0], diffs_post.index, ax=ax2)
-    ax2.set_xlabel("After. Number of unbalanced covariates: {num_unbal}".format(
-        num_unbal=num_unbal_post), fontsize=14)
-    ax2.axvline(x=-0.1, ymin=0, ymax=1, color=color, linestyle='--')
-    ax2.axvline(x=0.1, ymin=0, ymax=1, color=color, linestyle='--')
+    ax2.set_xlabel(
+        "After. Number of unbalanced covariates: {num_unbal}".format(
+            num_unbal=num_unbal_post
+        ),
+        fontsize=14,
+    )
+    ax2.axvline(x=-0.1, ymin=0, ymax=1, color=color, linestyle="--")
+    ax2.axvline(x=0.1, ymin=0, ymax=1, color=color, linestyle="--")
 
-    fig.suptitle('Standardized differences in means', fontsize=16)
+    fig.suptitle("Standardized differences in means", fontsize=16)
 
     return fig
 
 
 def get_simple_iptw(W, propensity_score):
-    IPTW = (W / propensity_score) + \
-        (1 - W) / (1 - propensity_score)
+    IPTW = (W / propensity_score) + (1 - W) / (1 - propensity_score)
 
     return IPTW
 
@@ -694,10 +907,11 @@ def get_std_diffs(X, W, weight=None, weighted=False, numeric_threshold=5):
 
     if len(cols) == 0:
         raise ValueError(
-            "No variable passed the test for continuous or binary variables.")
+            "No variable passed the test for continuous or binary variables."
+        )
 
-    treat = (W == 1)
-    contr = (W == 0)
+    treat = W == 1
+    contr = W == 0
 
     X_1 = X.loc[treat, cols]
     X_0 = X.loc[contr, cols]
@@ -709,33 +923,38 @@ def get_std_diffs(X, W, weight=None, weighted=False, numeric_threshold=5):
     std_diffs_prop = np.empty(sum(prop_index))
 
     if weighted:
-        assert weight is not None, 'weight should be provided when weighting is set to "True"'
+        assert (
+            weight is not None
+        ), 'weight should be provided when weighting is set to "True"'
 
         weight_1 = weight[treat]
         weight_0 = weight[contr]
 
         X_1_mean, X_1_var = np.apply_along_axis(
-            lambda x: _get_wmean_wvar(x, weight_1), 0, X_1)
+            lambda x: _get_wmean_wvar(x, weight_1), 0, X_1
+        )
         X_0_mean, X_0_var = np.apply_along_axis(
-            lambda x: _get_wmean_wvar(x, weight_0), 0, X_0)
+            lambda x: _get_wmean_wvar(x, weight_0), 0, X_0
+        )
 
     elif not weighted:
-        X_1_mean, X_1_var = np.apply_along_axis(
-            lambda x: _get_mean_var(x), 0, X_1)
-        X_0_mean, X_0_var = np.apply_along_axis(
-            lambda x: _get_mean_var(x), 0, X_0)
+        X_1_mean, X_1_var = np.apply_along_axis(lambda x: _get_mean_var(x), 0, X_1)
+        X_0_mean, X_0_var = np.apply_along_axis(lambda x: _get_mean_var(x), 0, X_0)
 
     X_1_mean_cont, X_1_var_cont = X_1_mean[cont_index], X_1_var[cont_index]
     X_0_mean_cont, X_0_var_cont = X_0_mean[cont_index], X_0_var[cont_index]
 
-    std_diffs_cont = ((X_1_mean_cont - X_0_mean_cont) /
-                      np.sqrt((X_1_var_cont + X_0_var_cont) / 2))
+    std_diffs_cont = (X_1_mean_cont - X_0_mean_cont) / np.sqrt(
+        (X_1_var_cont + X_0_var_cont) / 2
+    )
 
     X_1_mean_prop = X_1_mean[prop_index]
     X_0_mean_prop = X_0_mean[prop_index]
 
-    std_diffs_prop = ((X_1_mean_prop - X_0_mean_prop) /
-                      np.sqrt(((X_1_mean_prop * (1 - X_1_mean_prop)) + (X_0_mean_prop * (1 - X_0_mean_prop))) / 2))
+    std_diffs_prop = (X_1_mean_prop - X_0_mean_prop) / np.sqrt(
+        ((X_1_mean_prop * (1 - X_1_mean_prop)) + (X_0_mean_prop * (1 - X_0_mean_prop)))
+        / 2
+    )
 
     std_diffs = np.concatenate([std_diffs_cont, std_diffs_prop], axis=0)
     std_diffs_df = pd.DataFrame(std_diffs, index=cols)
@@ -749,11 +968,12 @@ def _get_numeric_vars(X, threshold=5):
     is set to 5 by default.
     """
 
-    cont = [(not hasattr(X.iloc[:, i], 'cat')) and (
-        X.iloc[:, i].nunique() >= threshold) for i in range(X.shape[1])]
+    cont = [
+        (not hasattr(X.iloc[:, i], "cat")) and (X.iloc[:, i].nunique() >= threshold)
+        for i in range(X.shape[1])
+    ]
 
-    prop = [X.iloc[:, i].nunique(
-    ) == 2 for i in range(X.shape[1])]
+    prop = [X.iloc[:, i].nunique() == 2 for i in range(X.shape[1])]
 
     cont_cols = list(X.loc[:, cont].columns)
     prop_cols = list(X.loc[:, prop].columns)
@@ -761,15 +981,18 @@ def _get_numeric_vars(X, threshold=5):
     dropped = set(X.columns) - set(cont_cols + prop_cols)
 
     if dropped:
-        logger.info('Some non-binary variables were dropped because they had fewer than {} unique values or were of the \
-                     dtype "cat". The dropped variables are: {}'.format(threshold, dropped))
+        logger.info(
+            'Some non-binary variables were dropped because they had fewer than {} unique values or were of the \
+                     dtype "cat". The dropped variables are: {}'.format(
+                threshold, dropped
+            )
+        )
 
     return cont_cols, prop_cols
 
 
 def _get_mean_var(X):
-    """Calculate the mean and variance of a variable.
-    """
+    """Calculate the mean and variance of a variable."""
     mean = X.mean()
     var = X.var()
 
@@ -777,7 +1000,7 @@ def _get_mean_var(X):
 
 
 def _get_wmean_wvar(X, weight):
-    '''
+    """
     Calculate the weighted mean of a variable given an arbitrary
     sample weight. Formulas from:
 
@@ -786,9 +1009,10 @@ def _get_wmean_wvar(X, weight):
     Using the Propensity Score to Estimate Causal Treatment Effects in
     Observational Studies.
     Statistics in Medicine 34 (28): 3661 79. https://doi.org/10.1002/sim.6607.
-    '''
+    """
     weighted_mean = np.sum(weight * X) / np.sum(weight)
-    weighted_var = (np.sum(weight) / (np.power(np.sum(weight), 2) - np.sum(
-        np.power(weight, 2)))) * (np.sum(weight * np.power((X - weighted_mean), 2)))
+    weighted_var = (
+        np.sum(weight) / (np.power(np.sum(weight), 2) - np.sum(np.power(weight, 2)))
+    ) * (np.sum(weight * np.power((X - weighted_mean), 2)))
 
     return [weighted_mean, weighted_var]
diff --git a/causalml/optimize/unit_selection.py b/causalml/optimize/unit_selection.py
index 0c01a4bc..bc0fc526 100644
--- a/causalml/optimize/unit_selection.py
+++ b/causalml/optimize/unit_selection.py
@@ -7,7 +7,7 @@
 
 
 class CounterfactualUnitSelector:
-    '''
+    """
     A highly experimental implementation of the counterfactual unit selection
     model proposed by Li and Pearl (2019).
 
@@ -51,10 +51,17 @@ class CounterfactualUnitSelector:
     ----------
     Li, Ang, and Judea Pearl. 2019. “Unit Selection Based on Counterfactual
     Logic.” https://ftp.cs.ucla.edu/pub/stat_ser/r488.pdf.
-    '''
-
-    def __init__(self, learner, nevertaker_payoff, alwaystaker_payoff,
-                 complier_payoff, defier_payoff, organic_conversion=None):
+    """
+
+    def __init__(
+        self,
+        learner,
+        nevertaker_payoff,
+        alwaystaker_payoff,
+        complier_payoff,
+        defier_payoff,
+        organic_conversion=None,
+    ):
 
         self.learner = learner
         self.nevertaker_payoff = nevertaker_payoff
@@ -64,9 +71,9 @@ def __init__(self, learner, nevertaker_payoff, alwaystaker_payoff,
         self.organic_conversion = organic_conversion
 
     def fit(self, data, treatment, outcome):
-        '''
+        """
         Fits the class.
-        '''
+        """
 
         if self._gain_equality_check():
 
@@ -78,10 +85,10 @@ def fit(self, data, treatment, outcome):
             self._fit_condprob_models(data, treatment, outcome)
 
     def predict(self, data, treatment, outcome):
-        '''
+        """
         Predicts an individual-level payoff. If gain equality is satisfied, uses
         the exact function; if not, uses the midpoint between bounds.
-        '''
+        """
 
         if self._gain_equality_check():
 
@@ -94,17 +101,19 @@ def predict(self, data, treatment, outcome):
         return est_payoff
 
     def _gain_equality_check(self):
-        '''
+        """
         Checks if gain equality is satisfied. If so, the optimization task can
         be simplified.
-        '''
+        """
 
-        return self.complier_payoff + self.defier_payoff == \
-            self.alwaystaker_payoff + self.nevertaker_payoff
+        return (
+            self.complier_payoff + self.defier_payoff
+            == self.alwaystaker_payoff + self.nevertaker_payoff
+        )
 
     @staticmethod
     def _make_segments(data, treatment, outcome):
-        '''
+        """
         Constructs the following segments:
 
         * AC = Pr(Y = 1, W = 1 /mid X)
@@ -114,22 +123,22 @@ def _make_segments(data, treatment, outcome):
 
         where the names of the outcomes correspond the combinations of
         the relevant segments, eg AC = Always-taker or Complier.
-        '''
+        """
 
-        segments = np.empty(data.shape[0], dtype='object')
+        segments = np.empty(data.shape[0], dtype="object")
 
-        segments[(data[treatment] == 1) & (data[outcome] == 1)] = 'AC'
-        segments[(data[treatment] == 0) & (data[outcome] == 1)] = 'AD'
-        segments[(data[treatment] == 1) & (data[outcome] == 0)] = 'ND'
-        segments[(data[treatment] == 0) & (data[outcome] == 0)] = 'NC'
+        segments[(data[treatment] == 1) & (data[outcome] == 1)] = "AC"
+        segments[(data[treatment] == 0) & (data[outcome] == 1)] = "AD"
+        segments[(data[treatment] == 1) & (data[outcome] == 0)] = "ND"
+        segments[(data[treatment] == 0) & (data[outcome] == 0)] = "NC"
 
         return segments
 
     def _fit_segment_model(self, data, treatment, outcome):
-        '''
+        """
         Fits a classifier for estimating the probabilities for the unit
-        segment combinations.        
-        '''
+        segment combinations.
+        """
 
         model = clone(self.learner)
 
@@ -139,10 +148,10 @@ def _fit_segment_model(self, data, treatment, outcome):
         self.segment_model = model.fit(X, y)
 
     def _fit_condprob_models(self, data, treatment, outcome):
-        '''
+        """
         Fits two classifiers to estimate conversion probabilities conditional
         on the treatment.
-        '''
+        """
 
         trt_learner = clone(self.learner)
         ctr_learner = clone(self.learner)
@@ -150,16 +159,16 @@ def _fit_condprob_models(self, data, treatment, outcome):
         treated = data[treatment] == 1
 
         X = data.drop([treatment, outcome], axis=1)
-        y = data['outcome']
+        y = data["outcome"]
 
         self.trt_model = trt_learner.fit(X[treated], y[treated])
         self.ctr_model = ctr_learner.fit(X[~treated], y[~treated])
 
     def _get_exact_benefit(self, data, treatment, outcome):
-        '''
+        """
         Calculates the exact benefit function of Theorem 4 in Li and Pearl (2019).
         Returns the exact benefit.
-        '''
+        """
         beta = self.complier_payoff
         gamma = self.alwaystaker_payoff
         theta = self.nevertaker_payoff
@@ -169,13 +178,16 @@ def _get_exact_benefit(self, data, treatment, outcome):
         segment_prob = self.segment_model.predict_proba(X)
         segment_name = self.segment_model.classes_
 
-        benefit = (beta - theta) * segment_prob[:, segment_name == 'AC'] + \
-            (gamma - beta) * segment_prob[:, segment_name == 'AD'] + theta
+        benefit = (
+            (beta - theta) * segment_prob[:, segment_name == "AC"]
+            + (gamma - beta) * segment_prob[:, segment_name == "AD"]
+            + theta
+        )
 
         return benefit
 
     def _obj_func_midp(self, data, treatment, outcome):
-        '''
+        """
         Calculates bounds for the objective function. Returns the midpoint
         between bounds.
 
@@ -209,7 +221,7 @@ def _obj_func_midp(self, data, treatment, outcome):
 
         pr_y_x : float
             Organic probability of conversion.
-        '''
+        """
 
         X = data.drop([treatment, outcome], axis=1)
 
@@ -218,18 +230,20 @@ def _obj_func_midp(self, data, treatment, outcome):
         theta = self.nevertaker_payoff
         delta = self.defier_payoff
 
-        pr_y0_w1, pr_y1_w1 = np.split(self.trt_model.predict_proba(X),
-                                      indices_or_sections=2, axis=1)
-        pr_y0_w0, pr_y1_w0 = np.split(self.ctr_model.predict_proba(X),
-                                      indices_or_sections=2, axis=1)
+        pr_y0_w1, pr_y1_w1 = np.split(
+            self.trt_model.predict_proba(X), indices_or_sections=2, axis=1
+        )
+        pr_y0_w0, pr_y1_w0 = np.split(
+            self.ctr_model.predict_proba(X), indices_or_sections=2, axis=1
+        )
 
         segment_prob = self.segment_model.predict_proba(X)
         segment_name = self.segment_model.classes_
 
-        pr_y1w1_x = segment_prob[:, segment_name == 'AC']
-        pr_y0w0_x = segment_prob[:, segment_name == 'NC']
-        pr_y1w0_x = segment_prob[:, segment_name == 'AD']
-        pr_y0w1_x = segment_prob[:, segment_name == 'ND']
+        pr_y1w1_x = segment_prob[:, segment_name == "AC"]
+        pr_y0w0_x = segment_prob[:, segment_name == "NC"]
+        pr_y1w0_x = segment_prob[:, segment_name == "AD"]
+        pr_y0w1_x = segment_prob[:, segment_name == "ND"]
 
         if self.organic_conversion is not None:
 
@@ -239,22 +253,41 @@ def _obj_func_midp(self, data, treatment, outcome):
 
             pr_y_x = pr_y1_w0
             warnings.warn(
-                'Probability of organic conversion estimated from control observations.')
+                "Probability of organic conversion estimated from control observations."
+            )
 
         p1 = (beta - theta) * pr_y1_w1 + delta * pr_y1_w0 + theta * pr_y0_w0
         p2 = gamma * pr_y1_w1 + delta * pr_y0_w1 + (beta - gamma) * pr_y0_w0
-        p3 = (gamma - delta) * pr_y1_w1 + delta * pr_y1_w0 + theta * \
-            pr_y0_w0 + (beta - gamma - theta + delta) * (pr_y1w1_x + pr_y0w0_x)
-        p4 = (beta - theta) * pr_y1_w1 - (beta - gamma - theta) * pr_y1_w0 + \
-            theta * pr_y0_w0 + (beta - gamma - theta + delta) * \
-            (pr_y1w0_x + pr_y0w1_x)
+        p3 = (
+            (gamma - delta) * pr_y1_w1
+            + delta * pr_y1_w0
+            + theta * pr_y0_w0
+            + (beta - gamma - theta + delta) * (pr_y1w1_x + pr_y0w0_x)
+        )
+        p4 = (
+            (beta - theta) * pr_y1_w1
+            - (beta - gamma - theta) * pr_y1_w0
+            + theta * pr_y0_w0
+            + (beta - gamma - theta + delta) * (pr_y1w0_x + pr_y0w1_x)
+        )
         p5 = (gamma - delta) * pr_y1_w1 + delta * pr_y1_w0 + theta * pr_y0_w0
-        p6 = (beta - theta) * pr_y1_w1 - (beta - gamma - theta) * pr_y1_w0 + \
-            theta * pr_y0_w0
-        p7 = (gamma - delta) * pr_y1_w1 - (beta - gamma - theta) * pr_y1_w0 + \
-            theta * pr_y0_w0 + (beta - gamma - theta + delta) * pr_y_x
-        p8 = (beta - theta) * pr_y1_w1 + delta * pr_y1_w0 + theta * \
-            pr_y0_w0 - (beta - gamma - theta + delta) * pr_y_x
+        p6 = (
+            (beta - theta) * pr_y1_w1
+            - (beta - gamma - theta) * pr_y1_w0
+            + theta * pr_y0_w0
+        )
+        p7 = (
+            (gamma - delta) * pr_y1_w1
+            - (beta - gamma - theta) * pr_y1_w0
+            + theta * pr_y0_w0
+            + (beta - gamma - theta + delta) * pr_y_x
+        )
+        p8 = (
+            (beta - theta) * pr_y1_w1
+            + delta * pr_y1_w0
+            + theta * pr_y0_w0
+            - (beta - gamma - theta + delta) * pr_y_x
+        )
 
         params_1 = np.concatenate((p1, p2, p3, p4), axis=1)
         params_2 = np.concatenate((p5, p6, p7, p8), axis=1)
diff --git a/causalml/optimize/utils.py b/causalml/optimize/utils.py
index 8ec1e89c..5c610b89 100644
--- a/causalml/optimize/utils.py
+++ b/causalml/optimize/utils.py
@@ -2,7 +2,7 @@
 
 
 def get_treatment_costs(treatment, control_name, cc_dict, ic_dict):
-    '''
+    """
     Set the conversion and impression costs based on a dict of parameters.
 
     Calculate the actual cost of targeting a user with the actual treatment
@@ -32,7 +32,7 @@ def get_treatment_costs(treatment, control_name, cc_dict, ic_dict):
 
     conditions : list, len = len(set(treatment))
         A list of experimental conditions.
-    '''
+    """
 
     # Set the conversion costs of the treatments
     conversion_cost = np.zeros((len(treatment), len(cc_dict.keys())))
@@ -53,9 +53,15 @@ def get_treatment_costs(treatment, control_name, cc_dict, ic_dict):
     return conversion_cost, impression_cost, conditions_sorted
 
 
-def get_actual_value(treatment, observed_outcome, conversion_value,
-                     conditions, conversion_cost, impression_cost):
-    '''
+def get_actual_value(
+    treatment,
+    observed_outcome,
+    conversion_value,
+    conditions,
+    conversion_cost,
+    impression_cost,
+):
+    """
     Set the conversion and impression costs based on a dict of parameters.
 
     Calculate the actual value of targeting a user with the actual treatment group
@@ -88,11 +94,13 @@ def get_actual_value(treatment, observed_outcome, conversion_value,
 
     conversion_value : array, shape = (num_samples, )
         Array of payoffs from converting a user.
-    '''
+    """
 
-    cost_filter = [actual_group == possible_group
-                   for actual_group in treatment
-                   for possible_group in conditions]
+    cost_filter = [
+        actual_group == possible_group
+        for actual_group in treatment
+        for possible_group in conditions
+    ]
 
     conversion_cost_flat = conversion_cost.flatten()
     actual_cc = conversion_cost_flat[cost_filter]
@@ -100,14 +108,13 @@ def get_actual_value(treatment, observed_outcome, conversion_value,
     actual_ic = impression_cost_flat[cost_filter]
 
     # Calculate the actual value of having a user in their actual treatment
-    actual_value = (conversion_value - actual_cc) * \
-        observed_outcome - actual_ic
+    actual_value = (conversion_value - actual_cc) * observed_outcome - actual_ic
 
     return actual_value
 
 
 def get_uplift_best(cate, conditions):
-    '''
+    """
     Takes the CATE prediction from a learner, adds the control
     outcome array and finds the name of the argmax conditon.
 
@@ -122,7 +129,7 @@ def get_uplift_best(cate, conditions):
     -------
     uplift_recomm_name : array, shape = (num_samples, )
         The experimental group recommended by the learner.
-    '''
+    """
     cate_with_control = np.c_[np.zeros(cate.shape[0]), cate]
     uplift_best_idx = np.argmax(cate_with_control, axis=1)
     uplift_best_name = [conditions[idx] for idx in uplift_best_idx]
diff --git a/causalml/optimize/value_optimization.py b/causalml/optimize/value_optimization.py
index 4ef60bb9..a9ac8791 100644
--- a/causalml/optimize/value_optimization.py
+++ b/causalml/optimize/value_optimization.py
@@ -2,7 +2,7 @@
 
 
 class CounterfactualValueEstimator:
-    '''
+    """
     Args
     ----
     treatment : array, shape = (num_samples, )
@@ -42,10 +42,21 @@ class CounterfactualValueEstimator:
     to the control outcome to obtain y_proba under each condition. These
     outcomes are counterfactual because just one of them is actually
     observed.
-    '''
-
-    def __init__(self, treatment, control_name, treatment_names, y_proba,
-                 cate, value, conversion_cost, impression_cost, *args, **kwargs):
+    """
+
+    def __init__(
+        self,
+        treatment,
+        control_name,
+        treatment_names,
+        y_proba,
+        cate,
+        value,
+        conversion_cost,
+        impression_cost,
+        *args,
+        **kwargs
+    ):
 
         self.treatment = treatment
         self.control_name = control_name
@@ -57,47 +68,50 @@ def __init__(self, treatment, control_name, treatment_names, y_proba,
         self.impression_cost = impression_cost
 
     def predict_best(self):
-        '''
+        """
         Predict the best treatment group based on the highest counterfactual
         value for a treatment.
-        '''
+        """
         self._get_counterfactuals()
         self._get_counterfactual_values()
         return self.best_treatment
 
     def predict_counterfactuals(self):
-        '''
+        """
         Predict the counterfactual values for each treatment group.
-        '''
+        """
         self._get_counterfactuals()
         self._get_counterfactual_values()
         return self.expected_values
 
     def _get_counterfactuals(self):
-        '''
+        """
         Get an array of counterfactual outcomes based on control outcome and
         the array of conditional average treatment effects.
-        '''
+        """
         conditions = self.treatment_names.copy()
         conditions.insert(0, self.control_name)
         cates_with_control = np.c_[np.zeros(self.cate.shape[0]), self.cate]
         cates_flat = cates_with_control.flatten()
 
-        cates_filt = [actual_group == poss_group
-                      for actual_group in self.treatment
-                      for poss_group in conditions]
+        cates_filt = [
+            actual_group == poss_group
+            for actual_group in self.treatment
+            for poss_group in conditions
+        ]
 
         control_outcome = self.y_proba - cates_flat[cates_filt]
         self.counterfactuals = cates_with_control + control_outcome[:, None]
 
     def _get_counterfactual_values(self):
-        '''
+        """
         Calculate the expected value of assigning a unit to each of the
         treatment conditions given the value of conversion and the conversion
         and impression costs associated with the treatment.
-        '''
+        """
 
-        self.expected_values = ((self.value[:, None] - self.conversion_cost) *
-                                self.counterfactuals - self.impression_cost)
+        self.expected_values = (
+            self.value[:, None] - self.conversion_cost
+        ) * self.counterfactuals - self.impression_cost
 
         self.best_treatment = np.argmax(self.expected_values, axis=1)
diff --git a/causalml/propensity.py b/causalml/propensity.py
index f0fc6acc..9dd7c563 100644
--- a/causalml/propensity.py
+++ b/causalml/propensity.py
@@ -8,7 +8,7 @@
 import xgboost as xgb
 
 
-logger = logging.getLogger('causalml')
+logger = logging.getLogger("causalml")
 
 
 class PropensityModel(metaclass=ABCMeta):
@@ -51,9 +51,7 @@ def predict(self, X):
         Returns:
             (numpy.ndarray): Propensity scores between 0 and 1.
         """
-        return np.clip(
-            self.model.predict_proba(X)[:, 1], *self.clip_bounds
-        )
+        return np.clip(self.model.predict_proba(X)[:, 1], *self.clip_bounds)
 
     def fit_predict(self, X, y):
         """
@@ -68,7 +66,7 @@ def fit_predict(self, X, y):
         """
         self.fit(X, y)
         propensity_scores = self.predict(X)
-        logger.info('AUC score: {:.6f}'.format(auc(y, propensity_scores)))
+        logger.info("AUC score: {:.6f}".format(auc(y, propensity_scores)))
         return propensity_scores
 
 
@@ -80,16 +78,18 @@ class LogisticRegressionPropensityModel(PropensityModel):
     @property
     def _model(self):
         kwargs = {
-            'penalty': 'elasticnet',
-            'solver': 'saga',
-            'Cs': np.logspace(1e-3, 1 - 1e-3, 4),
-            'l1_ratios': np.linspace(1e-3, 1 - 1e-3, 4),
-            'cv': StratifiedKFold(
-                n_splits=self.model_kwargs.pop('n_fold') if 'n_fold' in self.model_kwargs else 4,
+            "penalty": "elasticnet",
+            "solver": "saga",
+            "Cs": np.logspace(1e-3, 1 - 1e-3, 4),
+            "l1_ratios": np.linspace(1e-3, 1 - 1e-3, 4),
+            "cv": StratifiedKFold(
+                n_splits=self.model_kwargs.pop("n_fold")
+                if "n_fold" in self.model_kwargs
+                else 4,
                 shuffle=True,
-                random_state=self.model_kwargs.get('random_state', 42)
+                random_state=self.model_kwargs.get("random_state", 42),
             ),
-            'random_state': 42,
+            "random_state": 42,
         }
         kwargs.update(self.model_kwargs)
 
@@ -110,25 +110,22 @@ class GradientBoostedPropensityModel(PropensityModel):
     https://xgboost.readthedocs.io/en/latest/python/python_api.html
     """
 
-    def __init__(
-        self,
-        early_stop=False,
-        clip_bounds=(1e-3, 1 - 1e-3),
-        **model_kwargs
-    ):
-        super(GradientBoostedPropensityModel, self).__init__(clip_bounds, **model_kwargs)
+    def __init__(self, early_stop=False, clip_bounds=(1e-3, 1 - 1e-3), **model_kwargs):
+        super(GradientBoostedPropensityModel, self).__init__(
+            clip_bounds, **model_kwargs
+        )
         self.early_stop = early_stop
 
     @property
     def _model(self):
         kwargs = {
-            'max_depth': 8,
-            'learning_rate': 0.1,
-            'n_estimators': 100,
-            'objective': 'binary:logistic',
-            'nthread': -1,
-            'colsample_bytree': 0.8,
-            'random_state': 42,
+            "max_depth": 8,
+            "learning_rate": 0.1,
+            "n_estimators": 100,
+            "objective": "binary:logistic",
+            "nthread": -1,
+            "colsample_bytree": 0.8,
+            "random_state": 42,
         }
         kwargs.update(self.model_kwargs)
 
@@ -152,7 +149,7 @@ def fit(self, X, y, early_stopping_rounds=10, stop_val_size=0.2):
                 X_train,
                 y_train,
                 eval_set=[(X_val, y_val)],
-                early_stopping_rounds=early_stopping_rounds
+                early_stopping_rounds=early_stopping_rounds,
             )
         else:
             super(GradientBoostedPropensityModel, self).fit(X, y)
@@ -169,10 +166,9 @@ def predict(self, X):
         """
         if self.early_stop:
             return np.clip(
-                self.model.predict_proba(
-                    X,
-                    ntree_limit=self.model.best_ntree_limit
-                )[:, 1],
+                self.model.predict_proba(X, ntree_limit=self.model.best_ntree_limit)[
+                    :, 1
+                ],
                 *self.clip_bounds
             )
         else:
@@ -197,7 +193,9 @@ def calibrate(ps, treatment):
     return gam.predict_proba(ps)
 
 
-def compute_propensity_score(X, treatment, p_model=None, X_pred=None, treatment_pred=None, calibrate_p=True):
+def compute_propensity_score(
+    X, treatment, p_model=None, X_pred=None, treatment_pred=None, calibrate_p=True
+):
     """Generate propensity score if user didn't provide
 
     Args:
@@ -227,12 +225,12 @@ def compute_propensity_score(X, treatment, p_model=None, X_pred=None, treatment_
         p = p_model.predict(X_pred)
 
     if calibrate_p:
-        logger.info('Calibrating propensity scores.')
+        logger.info("Calibrating propensity scores.")
         p = calibrate(p, treatment_pred)
 
     # force the p values within the range
     eps = np.finfo(float).eps
-    p = np.where(p < 0 + eps, 0 + eps*1.001, p)
-    p = np.where(p > 1 - eps, 1 - eps*1.001, p)
+    p = np.where(p < 0 + eps, 0 + eps * 1.001, p)
+    p = np.where(p > 1 - eps, 1 - eps * 1.001, p)
 
     return p, p_model
diff --git a/docs/conf.py b/docs/conf.py
index caf814a2..cb6ce65f 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -15,13 +15,14 @@
 import sys
 import os
 import matplotlib
-matplotlib.use('agg')
+
+matplotlib.use("agg")
 
 # If extensions (or modules to document with autodoc) are in another
 # directory, add these directories to sys.path here. If the directory is
 # relative to the documentation root, use os.path.abspath to make it
 # absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+# sys.path.insert(0, os.path.abspath('.'))
 
 # Get the project root dir, which is the parent dir of this
 # cwd = os.getcwd()
@@ -38,39 +39,39 @@
 # -- General configuration ---------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
-    'sphinx.ext.autodoc',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.doctest',
-    'sphinx.ext.mathjax',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.autosectionlabel',
-    'sphinxcontrib.bibtex'
+    "sphinx.ext.autodoc",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.doctest",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.autosectionlabel",
+    "sphinxcontrib.bibtex",
 ]
 
 autodoc_mock_imports = ["_tkinter"]
 
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'causalml'
-copyright = u'2019 Uber Technologies, Inc.'
-author = 'CausalML'
+project = "causalml"
+copyright = "2019 Uber Technologies, Inc."
+author = "CausalML"
 
 # The version info for the project you're documenting, acts as replacement
 # for |version| and |release|, also used in various other places throughout
@@ -83,42 +84,38 @@
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#language = None
+# language = None
 
 # There are two options for replacing |today|: either, you set today to
 # some non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = [
-    '_build',
-    '*processor*',
-    'causalml.batch'
-]
+exclude_patterns = ["_build", "*processor*", "causalml.batch"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built
 # documents.
@@ -129,87 +126,84 @@
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'sphinx_rtd_theme'
+html_theme = "sphinx_rtd_theme"
 
 # Theme options are theme-specific and customize the look and feel of a
 # theme further.  For a list of options available for each theme, see the
 # documentation.
-html_theme_options = {
-        'logo_only': False,
-        'display_version': True
-}
+html_theme_options = {"logo_only": False, "display_version": True}
 
 # Add any paths that contain custom themes here, relative to this directory.
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as
 # html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the
 # top of the sidebar.
-html_logo = '_static/img/logo/causalml_logo_square_transparent.png'
+html_logo = "_static/img/logo/causalml_logo_square_transparent.png"
 
 # The name of an image file (within the static path) to use as favicon
 # of the docs.  This file should be a Windows icon file (.ico) being
 # 16x16 or 32x32 pixels large.
-html_favicon = '_static/img/logo/favicon.ico'
+html_favicon = "_static/img/logo/favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets)
 # here, relative to this directory. They are copied after the builtin
 # static files, so a file named "default.css" will overwrite the builtin
 # "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page
 # bottom, using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names
 # to template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer.
 # Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer.
 # Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages
 # will contain a <link> tag referring to it.  The value of this option
 # must be the base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'causalml_doc'
+htmlhelp_basename = "causalml_doc"
 
 
 # -- Options for LaTeX output ------------------------------------------
@@ -217,10 +211,8 @@
 latex_elements = {
     # The paper size ('letterpaper' or 'a4paper').
     #'papersize': 'letterpaper',
-
     # The font size ('10pt', '11pt' or '12pt').
     #'pointsize': '10pt',
-
     # Additional stuff for the LaTeX preamble.
     #'preamble': '',
 }
@@ -229,44 +221,38 @@
 # (source start file, target name, title, author, documentclass
 # [howto/manual]).
 latex_documents = [
-    ('index', 'causalml.tex',
-     u'causalml Documentation',
-     u'Someone at Uber', 'manual'),
+    ("index", "causalml.tex", "causalml Documentation", "Someone at Uber", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at
 # the top of the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings
 # are parts, not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    ('index', 'causalml',
-     u'causalml Documentation',
-     [author], 1)
-]
+man_pages = [("index", "causalml", "causalml Documentation", [author], 1)]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output ----------------------------------------
@@ -275,25 +261,28 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    ('index', 'causalml',
-     u'causalml Documentation',
-     author,
-     'causalml',
-     'One line description of project.',
-     'Miscellaneous'),
+    (
+        "index",
+        "causalml",
+        "causalml Documentation",
+        author,
+        "causalml",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
 
 numpydoc_show_class_members = True
 class_members_toctree = False
diff --git a/setup.py b/setup.py
index 8a1ec533..0500956d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,16 +1,18 @@
 from setuptools import dist, setup, find_packages
 from setuptools.extension import Extension
+
 try:
     from Cython.Build import cythonize
 except ImportError:
-    dist.Distribution().fetch_build_eggs(['cython>=0.28.0'])
+    dist.Distribution().fetch_build_eggs(["cython>=0.28.0"])
     from Cython.Build import cythonize
 import Cython.Compiler.Options
+
 Cython.Compiler.Options.annotate = True
 try:
     from numpy import get_include as np_get_include
 except ImportError:
-    dist.Distribution().fetch_build_eggs(['numpy'])
+    dist.Distribution().fetch_build_eggs(["numpy"])
     from numpy import get_include as np_get_include
 
 import causalml
@@ -24,16 +26,20 @@
     requirements = f.readlines()
 
 extensions = [
-    Extension("causalml.inference.tree.causaltree",
-              ["causalml/inference/tree/causaltree.pyx"],
-              libraries=[],
-              include_dirs=[np_get_include()],
-              extra_compile_args=["-O3"]),
-    Extension("causalml.inference.tree.uplift",
-              ["causalml/inference/tree/uplift.pyx"],
-              libraries=[],
-              include_dirs=[np_get_include()],
-              extra_compile_args=["-O3"])
+    Extension(
+        "causalml.inference.tree.causaltree",
+        ["causalml/inference/tree/causaltree.pyx"],
+        libraries=[],
+        include_dirs=[np_get_include()],
+        extra_compile_args=["-O3"],
+    ),
+    Extension(
+        "causalml.inference.tree.uplift",
+        ["causalml/inference/tree/uplift.pyx"],
+        libraries=[],
+        include_dirs=[np_get_include()],
+        extra_compile_args=["-O3"],
+    ),
 ]
 
 packages = find_packages()
@@ -56,15 +62,13 @@
     ],
     setup_requires=[
         # Setuptools 18.0 properly handles Cython extensions.
-        'setuptools>=18.0',
-        'cython',
-        'numpy',
-        'scikit-learn>=0.22.0'
+        "setuptools>=18.0",
+        "cython",
+        "numpy",
+        "scikit-learn>=0.22.0",
     ],
     install_requires=requirements,
     ext_modules=cythonize(extensions, annotate=True),
     include_dirs=[np_get_include()],
-    extras_require={
-        'tf': ['tensorflow>=2.4.0']
-    }
+    extras_require={"tf": ["tensorflow>=2.4.0"]},
 )
diff --git a/tests/conftest.py b/tests/conftest.py
index c681481e..88e91743 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -7,7 +7,7 @@
 from .const import RANDOM_SEED, N_SAMPLE, TREATMENT_NAMES, CONVERSION
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def generate_regression_data():
 
     generated = False
@@ -15,14 +15,14 @@ def generate_regression_data():
     def _generate_data():
         if not generated:
             np.random.seed(RANDOM_SEED)
-            data = synthetic_data(mode=1, n=N_SAMPLE, p=8, sigma=.1)
+            data = synthetic_data(mode=1, n=N_SAMPLE, p=8, sigma=0.1)
 
         return data
 
     yield _generate_data
 
 
-@pytest.fixture(scope='module')
+@pytest.fixture(scope="module")
 def generate_classification_data():
 
     generated = False
@@ -30,16 +30,19 @@ def generate_classification_data():
     def _generate_data():
         if not generated:
             np.random.seed(RANDOM_SEED)
-            data = make_uplift_classification(n_samples=N_SAMPLE,
-                                              treatment_name=TREATMENT_NAMES,
-                                              y_name=CONVERSION,
-                                              random_seed=RANDOM_SEED)
+            data = make_uplift_classification(
+                n_samples=N_SAMPLE,
+                treatment_name=TREATMENT_NAMES,
+                y_name=CONVERSION,
+                random_seed=RANDOM_SEED,
+            )
 
         return data
 
     yield _generate_data
 
-@pytest.fixture(scope='module')
+
+@pytest.fixture(scope="module")
 def generate_classification_data_two_treatments():
 
     generated = False
@@ -47,10 +50,12 @@ def generate_classification_data_two_treatments():
     def _generate_data():
         if not generated:
             np.random.seed(RANDOM_SEED)
-            data = make_uplift_classification(n_samples=N_SAMPLE,
-                                              treatment_name=TREATMENT_NAMES[0:2],
-                                              y_name=CONVERSION,
-                                              random_seed=RANDOM_SEED)
+            data = make_uplift_classification(
+                n_samples=N_SAMPLE,
+                treatment_name=TREATMENT_NAMES[0:2],
+                y_name=CONVERSION,
+                random_seed=RANDOM_SEED,
+            )
 
         return data
 
diff --git a/tests/const.py b/tests/const.py
index 836239ba..1ea843de 100644
--- a/tests/const.py
+++ b/tests/const.py
@@ -1,13 +1,13 @@
 RANDOM_SEED = 42
 N_SAMPLE = 1000
-ERROR_THRESHOLD = .5
+ERROR_THRESHOLD = 0.5
 NUM_FEATURES = 6
 
-TREATMENT_COL = 'treatment'
-SCORE_COL = 'score'
-GROUP_COL = 'group'
-OUTCOME_COL = 'outcome'
+TREATMENT_COL = "treatment"
+SCORE_COL = "score"
+GROUP_COL = "group"
+OUTCOME_COL = "outcome"
 
-CONTROL_NAME = 'control'
-TREATMENT_NAMES = [CONTROL_NAME, 'treatment1', 'treatment2', 'treatment3']
-CONVERSION = 'conversion'
+CONTROL_NAME = "control"
+TREATMENT_NAMES = [CONTROL_NAME, "treatment1", "treatment2", "treatment3"]
+CONVERSION = "conversion"
diff --git a/tests/test_cevae.py b/tests/test_cevae.py
index 26fe7024..82762433 100644
--- a/tests/test_cevae.py
+++ b/tests/test_cevae.py
@@ -7,7 +7,9 @@
 
 
 def test_CEVAE():
-    y, X, treatment, tau, b, e = simulate_hidden_confounder(n=10000, p=5, sigma=1.0, adj=0.)
+    y, X, treatment, tau, b, e = simulate_hidden_confounder(
+        n=10000, p=5, sigma=1.0, adj=0.0
+    )
 
     outcome_dist = "normal"
     latent_dim = 20
@@ -17,31 +19,33 @@ def test_CEVAE():
     learning_rate = 1e-3
     learning_rate_decay = 0.1
 
-    cevae = CEVAE(outcome_dist=outcome_dist,
-                  latent_dim=latent_dim,
-                  hidden_dim=hidden_dim,
-                  num_epochs=num_epochs,
-                  batch_size=batch_size,
-                  learning_rate=learning_rate,
-                  learning_rate_decay=learning_rate_decay)
-
-    cevae.fit(X=torch.tensor(X, dtype=torch.float),
-              treatment=torch.tensor(treatment, dtype=torch.float),
-              y=torch.tensor(y, dtype=torch.float))
+    cevae = CEVAE(
+        outcome_dist=outcome_dist,
+        latent_dim=latent_dim,
+        hidden_dim=hidden_dim,
+        num_epochs=num_epochs,
+        batch_size=batch_size,
+        learning_rate=learning_rate,
+        learning_rate_decay=learning_rate_decay,
+    )
+
+    cevae.fit(
+        X=torch.tensor(X, dtype=torch.float),
+        treatment=torch.tensor(treatment, dtype=torch.float),
+        y=torch.tensor(y, dtype=torch.float),
+    )
 
     # check the accuracy of the ite accuracy
     ite = cevae.predict(X).flatten()
 
-    auuc_metrics = pd.DataFrame({'ite': ite,
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
+    auuc_metrics = pd.DataFrame(
+        {"ite": ite, "W": treatment, "y": y, "treatment_effect_col": tau}
+    )
 
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['ite'].sum() > cumgain['Random'].sum()
+    assert cumgain["ite"].sum() > cumgain["Random"].sum()
diff --git a/tests/test_counterfactual_unit_selection.py b/tests/test_counterfactual_unit_selection.py
index 15d58b49..f4b37106 100644
--- a/tests/test_counterfactual_unit_selection.py
+++ b/tests/test_counterfactual_unit_selection.py
@@ -15,29 +15,35 @@
 def test_counterfactual_unit_selection():
 
     df, X_names = make_uplift_classification(
-        n_samples=2000, treatment_name=['control', 'treatment'])
-    df['treatment_numeric'] = df['treatment_group_key'].replace(
-        {'control': 0, 'treatment': 1})
+        n_samples=2000, treatment_name=["control", "treatment"]
+    )
+    df["treatment_numeric"] = df["treatment_group_key"].replace(
+        {"control": 0, "treatment": 1}
+    )
     df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     train_idx = df_train.index
     test_idx = df_test.index
 
-    conversion_cost_dict = {'control': 0, 'treatment': 2.5}
-    impression_cost_dict = {'control': 0, 'treatment': 0}
+    conversion_cost_dict = {"control": 0, "treatment": 2.5}
+    impression_cost_dict = {"control": 0, "treatment": 0}
 
-    cc_array, ic_array, conditions = get_treatment_costs(treatment=df['treatment_group_key'],
-                                                         control_name='control',
-                                                         cc_dict=conversion_cost_dict,
-                                                         ic_dict=impression_cost_dict)
+    cc_array, ic_array, conditions = get_treatment_costs(
+        treatment=df["treatment_group_key"],
+        control_name="control",
+        cc_dict=conversion_cost_dict,
+        ic_dict=impression_cost_dict,
+    )
     conversion_value_array = np.full(df.shape[0], 20)
 
-    actual_value = get_actual_value(treatment=df['treatment_group_key'],
-                                    observed_outcome=df['conversion'],
-                                    conversion_value=conversion_value_array,
-                                    conditions=conditions,
-                                    conversion_cost=cc_array,
-                                    impression_cost=ic_array)
+    actual_value = get_actual_value(
+        treatment=df["treatment_group_key"],
+        observed_outcome=df["conversion"],
+        conversion_value=conversion_value_array,
+        conditions=conditions,
+        conversion_cost=cc_array,
+        impression_cost=ic_array,
+    )
 
     random_allocation_value = actual_value.loc[test_idx].mean()
 
@@ -46,22 +52,28 @@ def test_counterfactual_unit_selection():
     complier_payoff = 17.5
     defier_payoff = -20
 
-    cus = CounterfactualUnitSelector(learner=LogisticRegressionCV(),
-                                     nevertaker_payoff=nevertaker_payoff,
-                                     alwaystaker_payoff=alwaystaker_payoff,
-                                     complier_payoff=complier_payoff,
-                                     defier_payoff=defier_payoff)
-
-    cus.fit(data=df_train.drop('treatment_group_key', 1),
-            treatment='treatment_numeric',
-            outcome='conversion')
-
-    cus_pred = cus.predict(data=df_test.drop('treatment_group_key', 1),
-                           treatment='treatment_numeric',
-                           outcome='conversion')
+    cus = CounterfactualUnitSelector(
+        learner=LogisticRegressionCV(),
+        nevertaker_payoff=nevertaker_payoff,
+        alwaystaker_payoff=alwaystaker_payoff,
+        complier_payoff=complier_payoff,
+        defier_payoff=defier_payoff,
+    )
+
+    cus.fit(
+        data=df_train.drop("treatment_group_key", 1),
+        treatment="treatment_numeric",
+        outcome="conversion",
+    )
+
+    cus_pred = cus.predict(
+        data=df_test.drop("treatment_group_key", 1),
+        treatment="treatment_numeric",
+        outcome="conversion",
+    )
 
     best_cus = np.where(cus_pred > 0, 1, 0)
-    actual_is_cus = df_test['treatment_numeric'] == best_cus.ravel()
+    actual_is_cus = df_test["treatment_numeric"] == best_cus.ravel()
     cus_value = actual_value.loc[test_idx][actual_is_cus].mean()
 
     assert cus_value > random_allocation_value
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 16195e9d..5e8d2123 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -1,49 +1,91 @@
 import pytest
 
-from causalml.dataset import simulate_nuisance_and_easy_treatment, simulate_hidden_confounder, simulate_randomized_trial
-from causalml.dataset import get_synthetic_preds, get_synthetic_summary, get_synthetic_auuc
+from causalml.dataset import (
+    simulate_nuisance_and_easy_treatment,
+    simulate_hidden_confounder,
+    simulate_randomized_trial,
+)
+from causalml.dataset import (
+    get_synthetic_preds,
+    get_synthetic_summary,
+    get_synthetic_auuc,
+)
 from causalml.dataset import get_synthetic_preds_holdout, get_synthetic_summary_holdout
 from causalml.inference.meta import LRSRegressor, XGBTRegressor
 
 
-@pytest.mark.parametrize('synthetic_data_func', [simulate_nuisance_and_easy_treatment,
-                                                 simulate_hidden_confounder,
-                                                 simulate_randomized_trial])
+@pytest.mark.parametrize(
+    "synthetic_data_func",
+    [
+        simulate_nuisance_and_easy_treatment,
+        simulate_hidden_confounder,
+        simulate_randomized_trial,
+    ],
+)
 def test_get_synthetic_preds(synthetic_data_func):
-    preds_dict = get_synthetic_preds(synthetic_data_func=synthetic_data_func,
-                                     n=1000,
-                                     estimators={'S Learner (LR)': LRSRegressor(), 'T Learner (XGB)': XGBTRegressor()})
+    preds_dict = get_synthetic_preds(
+        synthetic_data_func=synthetic_data_func,
+        n=1000,
+        estimators={
+            "S Learner (LR)": LRSRegressor(),
+            "T Learner (XGB)": XGBTRegressor(),
+        },
+    )
 
-    assert preds_dict['S Learner (LR)'].shape[0] == preds_dict['T Learner (XGB)'].shape[0]
+    assert (
+        preds_dict["S Learner (LR)"].shape[0] == preds_dict["T Learner (XGB)"].shape[0]
+    )
 
 
 def test_get_synthetic_summary():
-    summary = get_synthetic_summary(synthetic_data_func=simulate_nuisance_and_easy_treatment,
-                                    estimators={'S Learner (LR)': LRSRegressor(), 'T Learner (XGB)': XGBTRegressor()})
+    summary = get_synthetic_summary(
+        synthetic_data_func=simulate_nuisance_and_easy_treatment,
+        estimators={
+            "S Learner (LR)": LRSRegressor(),
+            "T Learner (XGB)": XGBTRegressor(),
+        },
+    )
 
     print(summary)
 
 
 def test_get_synthetic_preds_holdout():
-    preds_train, preds_valid = get_synthetic_preds_holdout(synthetic_data_func=simulate_nuisance_and_easy_treatment,
-                                                           n=1000,
-                                                           estimators={'S Learner (LR)': LRSRegressor(),
-                                                                       'T Learner (XGB)': XGBTRegressor()})
-
-    assert preds_train['S Learner (LR)'].shape[0] == preds_train['T Learner (XGB)'].shape[0]
-    assert preds_valid['S Learner (LR)'].shape[0] == preds_valid['T Learner (XGB)'].shape[0]
+    preds_train, preds_valid = get_synthetic_preds_holdout(
+        synthetic_data_func=simulate_nuisance_and_easy_treatment,
+        n=1000,
+        estimators={
+            "S Learner (LR)": LRSRegressor(),
+            "T Learner (XGB)": XGBTRegressor(),
+        },
+    )
+
+    assert (
+        preds_train["S Learner (LR)"].shape[0]
+        == preds_train["T Learner (XGB)"].shape[0]
+    )
+    assert (
+        preds_valid["S Learner (LR)"].shape[0]
+        == preds_valid["T Learner (XGB)"].shape[0]
+    )
 
 
 def test_get_synthetic_summary_holdout():
-    summary = get_synthetic_summary_holdout(synthetic_data_func=simulate_nuisance_and_easy_treatment)
+    summary = get_synthetic_summary_holdout(
+        synthetic_data_func=simulate_nuisance_and_easy_treatment
+    )
 
     print(summary)
 
 
 def test_get_synthetic_auuc():
-    preds_dict = get_synthetic_preds(synthetic_data_func=simulate_nuisance_and_easy_treatment,
-                                     n=1000,
-                                     estimators={'S Learner (LR)': LRSRegressor(), 'T Learner (XGB)': XGBTRegressor()})
+    preds_dict = get_synthetic_preds(
+        synthetic_data_func=simulate_nuisance_and_easy_treatment,
+        n=1000,
+        estimators={
+            "S Learner (LR)": LRSRegressor(),
+            "T Learner (XGB)": XGBTRegressor(),
+        },
+    )
 
     auuc_df = get_synthetic_auuc(preds_dict, plot=False)
     print(auuc_df)
diff --git a/tests/test_features.py b/tests/test_features.py
index da71ed1b..33092a9d 100644
--- a/tests/test_features.py
+++ b/tests/test_features.py
@@ -9,9 +9,13 @@ def generate_categorical_data():
 
     def _generate_data():
         if not generated:
-            df = pd.DataFrame({'cat1': ['a', 'a', 'b', 'a', 'c', 'b', 'd'],
-                               'cat2': ['aa', 'aa', 'aa', 'bb', 'bb', 'bb', 'cc'],
-                               'num1': [1, 2, 1, 2, 1, 1, 1]})
+            df = pd.DataFrame(
+                {
+                    "cat1": ["a", "a", "b", "a", "c", "b", "d"],
+                    "cat2": ["aa", "aa", "aa", "bb", "bb", "bb", "cc"],
+                    "num1": [1, 2, 1, 2, 1, 1, 1],
+                }
+            )
 
         return df
 
@@ -28,7 +32,7 @@ def test_load_data(generate_categorical_data):
 
 def test_LabelEncoder(generate_categorical_data):
     df = generate_categorical_data()
-    cat_cols = [col for col in df.columns if df[col].dtype == 'object']
+    cat_cols = [col for col in df.columns if df[col].dtype == "object"]
     n_category = 0
     for col in cat_cols:
         n_category += df[col].nunique()
@@ -44,7 +48,7 @@ def test_LabelEncoder(generate_categorical_data):
 
 def test_OneHotEncoder(generate_categorical_data):
     df = generate_categorical_data()
-    cat_cols = [col for col in df.columns if df[col].dtype == 'object']
+    cat_cols = [col for col in df.columns if df[col].dtype == "object"]
     n_category = 0
     for col in cat_cols:
         n_category += df[col].nunique()
diff --git a/tests/test_ivlearner.py b/tests/test_ivlearner.py
index eaccec28..cbf25d43 100644
--- a/tests/test_ivlearner.py
+++ b/tests/test_ivlearner.py
@@ -11,46 +11,74 @@
 
 from .const import RANDOM_SEED, N_SAMPLE, ERROR_THRESHOLD, CONTROL_NAME, CONVERSION
 
+
 def test_drivlearner():
     np.random.seed(RANDOM_SEED)
     n = 1000
     p = 8
     sigma = 1.0
 
-    X = np.random.uniform(size=n*p).reshape((n, -1))
-    b = np.sin(np.pi * X[:, 0] * X[:, 1]) + 2 * (X[:, 2] - 0.5) ** 2 + X[:, 3] + 0.5 * X[:, 4]
-    assignment = (np.random.uniform(size=n)>0.5).astype(int)
+    X = np.random.uniform(size=n * p).reshape((n, -1))
+    b = (
+        np.sin(np.pi * X[:, 0] * X[:, 1])
+        + 2 * (X[:, 2] - 0.5) ** 2
+        + X[:, 3]
+        + 0.5 * X[:, 4]
+    )
+    assignment = (np.random.uniform(size=n) > 0.5).astype(int)
     eta = 0.1
-    e_raw = np.maximum(np.repeat(eta, n), np.minimum(np.sin(np.pi * X[:, 0] * X[:, 1]), np.repeat(1-eta, n)))
+    e_raw = np.maximum(
+        np.repeat(eta, n),
+        np.minimum(np.sin(np.pi * X[:, 0] * X[:, 1]), np.repeat(1 - eta, n)),
+    )
     e = e_raw.copy()
     e[assignment == 0] = 0
     tau = (X[:, 0] + X[:, 1]) / 2
-    X_obs = X[:, [i for i in range(8) if i!=1]]
+    X_obs = X[:, [i for i in range(8) if i != 1]]
 
     w = np.random.binomial(1, e, size=n)
     treatment = w
     y = b + (w - 0.5) * tau + sigma * np.random.normal(size=n)
 
-    learner = BaseDRIVLearner(learner=XGBRegressor(), treatment_effect_learner=LinearRegression())
+    learner = BaseDRIVLearner(
+        learner=XGBRegressor(), treatment_effect_learner=LinearRegression()
+    )
 
     # check the accuracy of the ATE estimation
-    ate_p, lb, ub = learner.estimate_ate(X=X, assignment=assignment, treatment=treatment, y=y, p=(np.ones(n)*1e-6, e_raw))
+    ate_p, lb, ub = learner.estimate_ate(
+        X=X,
+        assignment=assignment,
+        treatment=treatment,
+        y=y,
+        p=(np.ones(n) * 1e-6, e_raw),
+    )
     assert (ate_p >= lb) and (ate_p <= ub)
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, assignment=assignment, treatment=treatment, y=y, p=(np.ones(n)*1e-6, e_raw), return_ci=True, n_bootstraps=10)
+    cate_p, _, _ = learner.fit_predict(
+        X=X,
+        assignment=assignment,
+        treatment=treatment,
+        y=y,
+        p=(np.ones(n) * 1e-6, e_raw),
+        return_ci=True,
+        n_bootstraps=10,
+    )
 
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
 
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
diff --git a/tests/test_match.py b/tests/test_match.py
index 5e6bfe1d..8b8678e6 100644
--- a/tests/test_match.py
+++ b/tests/test_match.py
@@ -16,7 +16,7 @@ def _generate_data():
         if not generated:
             y, X, treatment, tau, b, e = generate_regression_data()
 
-            features = ['x{}'.format(i) for i in range(X.shape[1])]
+            features = ["x{}".format(i) for i in range(X.shape[1])]
             df = pd.DataFrame(X, columns=features)
             df[TREATMENT_COL] = treatment
 
@@ -38,14 +38,14 @@ def _generate_data():
 def test_nearest_neighbor_match_by_group(generate_unmatched_data):
     df, features = generate_unmatched_data()
 
-    psm = NearestNeighborMatch(replace=False,
-                               ratio=1.,
-                               random_state=RANDOM_SEED)
+    psm = NearestNeighborMatch(replace=False, ratio=1.0, random_state=RANDOM_SEED)
 
-    matched = psm.match_by_group(data=df,
-                                 treatment_col=TREATMENT_COL,
-                                 score_cols=[SCORE_COL],
-                                 groupby_col=GROUP_COL)
+    matched = psm.match_by_group(
+        data=df,
+        treatment_col=TREATMENT_COL,
+        score_cols=[SCORE_COL],
+        groupby_col=GROUP_COL,
+    )
 
     assert sum(matched[TREATMENT_COL] == 0) == sum(matched[TREATMENT_COL] != 0)
 
@@ -53,12 +53,14 @@ def test_nearest_neighbor_match_by_group(generate_unmatched_data):
 def test_match_optimizer(generate_unmatched_data):
     df, features = generate_unmatched_data()
 
-    optimizer = MatchOptimizer(treatment_col=TREATMENT_COL,
-                               ps_col=SCORE_COL,
-                               matching_covariates=[SCORE_COL],
-                               min_users_per_group=100,
-                               smd_cols=[SCORE_COL],
-                               dev_cols_transformations={SCORE_COL: np.mean})
+    optimizer = MatchOptimizer(
+        treatment_col=TREATMENT_COL,
+        ps_col=SCORE_COL,
+        matching_covariates=[SCORE_COL],
+        min_users_per_group=100,
+        smd_cols=[SCORE_COL],
+        dev_cols_transformations={SCORE_COL: np.mean},
+    )
 
     matched = optimizer.search_best_match(df)
 
diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py
index cc85c04d..f9f7122a 100644
--- a/tests/test_meta_learners.py
+++ b/tests/test_meta_learners.py
@@ -9,10 +9,26 @@
 from sklearn.ensemble import RandomForestRegressor
 
 from causalml.dataset import synthetic_data
-from causalml.inference.meta import BaseSLearner, BaseSRegressor, BaseSClassifier, LRSRegressor
-from causalml.inference.meta import BaseTLearner, BaseTRegressor, BaseTClassifier, XGBTRegressor, MLPTRegressor
+from causalml.inference.meta import (
+    BaseSLearner,
+    BaseSRegressor,
+    BaseSClassifier,
+    LRSRegressor,
+)
+from causalml.inference.meta import (
+    BaseTLearner,
+    BaseTRegressor,
+    BaseTClassifier,
+    XGBTRegressor,
+    MLPTRegressor,
+)
 from causalml.inference.meta import BaseXLearner, BaseXClassifier, BaseXRegressor
-from causalml.inference.meta import BaseRLearner, BaseRClassifier, BaseRRegressor, XGBRRegressor
+from causalml.inference.meta import (
+    BaseRLearner,
+    BaseRClassifier,
+    BaseRRegressor,
+    XGBRRegressor,
+)
 from causalml.inference.meta import TMLELearner
 from causalml.inference.meta import BaseDRLearner
 from causalml.metrics import ape, get_cumgain
@@ -21,29 +37,45 @@
 
 
 def test_synthetic_data():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=N_SAMPLE, p=8, sigma=.1)
-
-    assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and
-            y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and
-            y.shape[0] == e.shape[0])
-
-    y, X, treatment, tau, b, e = synthetic_data(mode=2, n=N_SAMPLE, p=8, sigma=.1)
-
-    assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and
-            y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and
-            y.shape[0] == e.shape[0])
-
-    y, X, treatment, tau, b, e = synthetic_data(mode=3, n=N_SAMPLE, p=8, sigma=.1)
-
-    assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and
-            y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and
-            y.shape[0] == e.shape[0])
-
-    y, X, treatment, tau, b, e = synthetic_data(mode=4, n=N_SAMPLE, p=8, sigma=.1)
-
-    assert (y.shape[0] == X.shape[0] and y.shape[0] == treatment.shape[0] and
-            y.shape[0] == tau.shape[0] and y.shape[0] == b.shape[0] and
-            y.shape[0] == e.shape[0])
+    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=N_SAMPLE, p=8, sigma=0.1)
+
+    assert (
+        y.shape[0] == X.shape[0]
+        and y.shape[0] == treatment.shape[0]
+        and y.shape[0] == tau.shape[0]
+        and y.shape[0] == b.shape[0]
+        and y.shape[0] == e.shape[0]
+    )
+
+    y, X, treatment, tau, b, e = synthetic_data(mode=2, n=N_SAMPLE, p=8, sigma=0.1)
+
+    assert (
+        y.shape[0] == X.shape[0]
+        and y.shape[0] == treatment.shape[0]
+        and y.shape[0] == tau.shape[0]
+        and y.shape[0] == b.shape[0]
+        and y.shape[0] == e.shape[0]
+    )
+
+    y, X, treatment, tau, b, e = synthetic_data(mode=3, n=N_SAMPLE, p=8, sigma=0.1)
+
+    assert (
+        y.shape[0] == X.shape[0]
+        and y.shape[0] == treatment.shape[0]
+        and y.shape[0] == tau.shape[0]
+        and y.shape[0] == b.shape[0]
+        and y.shape[0] == e.shape[0]
+    )
+
+    y, X, treatment, tau, b, e = synthetic_data(mode=4, n=N_SAMPLE, p=8, sigma=0.1)
+
+    assert (
+        y.shape[0] == X.shape[0]
+        and y.shape[0] == treatment.shape[0]
+        and y.shape[0] == tau.shape[0]
+        and y.shape[0] == b.shape[0]
+        and y.shape[0] == e.shape[0]
+    )
 
 
 def test_BaseSLearner(generate_regression_data):
@@ -63,26 +95,33 @@ def test_BaseSRegressor(generate_regression_data):
     learner = BaseSRegressor(learner=XGBRegressor())
 
     # check the accuracy of the ATE estimation
-    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
+    ate_p, lb, ub = learner.estimate_ate(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
     assert (ate_p >= lb) and (ate_p <= ub)
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_LRSRegressor(generate_regression_data):
@@ -107,26 +146,33 @@ def test_BaseTLearner(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
     # test of using control_learner and treatment_learner
-    learner = BaseTLearner(learner=XGBRegressor(),
-                           control_learner=RandomForestRegressor(),
-                           treatment_learner=RandomForestRegressor())
+    learner = BaseTLearner(
+        learner=XGBRegressor(),
+        control_learner=RandomForestRegressor(),
+        treatment_learner=RandomForestRegressor(),
+    )
     # check the accuracy of the ATE estimation
     ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
     assert (ate_p >= lb) and (ate_p <= ub)
@@ -144,21 +190,26 @@ def test_BaseTRegressor(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_MLPTRegressor(generate_regression_data):
@@ -172,21 +223,26 @@ def test_MLPTRegressor(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_XGBTRegressor(generate_regression_data):
@@ -200,21 +256,26 @@ def test_XGBTRegressor(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseXLearner(generate_regression_data):
@@ -228,28 +289,35 @@ def test_BaseXLearner(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
     # basic test of using outcome_learner and effect_learner
-    learner = BaseXLearner(learner=XGBRegressor(),
-                           control_outcome_learner=RandomForestRegressor(),
-                           treatment_outcome_learner=RandomForestRegressor(),
-                           control_effect_learner=RandomForestRegressor(),
-                           treatment_effect_learner=RandomForestRegressor())
+    learner = BaseXLearner(
+        learner=XGBRegressor(),
+        control_outcome_learner=RandomForestRegressor(),
+        treatment_outcome_learner=RandomForestRegressor(),
+        control_effect_learner=RandomForestRegressor(),
+        treatment_effect_learner=RandomForestRegressor(),
+    )
     # check the accuracy of the ATE estimation
     ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
     assert (ate_p >= lb) and (ate_p <= ub)
@@ -267,21 +335,26 @@ def test_BaseXRegressor(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseXLearner_without_p(generate_regression_data):
@@ -295,21 +368,26 @@ def test_BaseXLearner_without_p(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseXRegressor_without_p(generate_regression_data):
@@ -323,21 +401,26 @@ def test_BaseXRegressor_without_p(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseRLearner(generate_regression_data):
@@ -351,29 +434,38 @@ def test_BaseRLearner(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
     # basic test of using outcome_learner and effect_learner
-    learner = BaseRLearner(learner=XGBRegressor(),
-                           outcome_learner=RandomForestRegressor(),
-                           effect_learner=RandomForestRegressor())
+    learner = BaseRLearner(
+        learner=XGBRegressor(),
+        outcome_learner=RandomForestRegressor(),
+        effect_learner=RandomForestRegressor(),
+    )
     ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
     assert (ate_p >= lb) and (ate_p <= ub)
-    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD * 5  # might need to look into higher ape
+    assert (
+        ape(tau.mean(), ate_p) < ERROR_THRESHOLD * 5
+    )  # might need to look into higher ape
 
 
 def test_BaseRRegressor(generate_regression_data):
@@ -387,21 +479,26 @@ def test_BaseRRegressor(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseRLearner_without_p(generate_regression_data):
@@ -415,21 +512,26 @@ def test_BaseRLearner_without_p(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseRRegressor_without_p(generate_regression_data):
@@ -443,21 +545,26 @@ def test_BaseRRegressor_without_p(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
 
 
 def test_TMLELearner(generate_regression_data):
@@ -477,34 +584,43 @@ def test_BaseSClassifier(generate_classification_data):
 
     df, x_names = generate_classification_data()
 
-    df['treatment_group_key'] = np.where(df['treatment_group_key'] == CONTROL_NAME, 0, 1)
+    df["treatment_group_key"] = np.where(
+        df["treatment_group_key"] == CONTROL_NAME, 0, 1
+    )
 
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     uplift_model = BaseSClassifier(learner=XGBClassifier())
 
-    uplift_model.fit(X=df_train[x_names].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values)
-
-    tau_pred = uplift_model.predict(X=df_test[x_names].values,
-                                    treatment=df_test['treatment_group_key'].values)
-
-    auuc_metrics = pd.DataFrame({'tau_pred': tau_pred.flatten(),
-                                 'W': df_test['treatment_group_key'].values,
-                                 CONVERSION: df_test[CONVERSION].values,
-                                 'treatment_effect_col': df_test['treatment_effect'].values})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col=CONVERSION,
-                          treatment_col='W',
-                          treatment_effect_col='treatment_effect_col')
+    uplift_model.fit(
+        X=df_train[x_names].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+    )
+
+    tau_pred = uplift_model.predict(
+        X=df_test[x_names].values, treatment=df_test["treatment_group_key"].values
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "tau_pred": tau_pred.flatten(),
+            "W": df_test["treatment_group_key"].values,
+            CONVERSION: df_test[CONVERSION].values,
+            "treatment_effect_col": df_test["treatment_effect"].values,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics,
+        outcome_col=CONVERSION,
+        treatment_col="W",
+        treatment_effect_col="treatment_effect_col",
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['tau_pred'].sum() > cumgain['Random'].sum()
+    assert cumgain["tau_pred"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseTClassifier(generate_classification_data):
@@ -513,34 +629,43 @@ def test_BaseTClassifier(generate_classification_data):
 
     df, x_names = generate_classification_data()
 
-    df['treatment_group_key'] = np.where(df['treatment_group_key'] == CONTROL_NAME, 0, 1)
+    df["treatment_group_key"] = np.where(
+        df["treatment_group_key"] == CONTROL_NAME, 0, 1
+    )
 
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     uplift_model = BaseTClassifier(learner=LogisticRegression())
 
-    uplift_model.fit(X=df_train[x_names].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values)
-
-    tau_pred = uplift_model.predict(X=df_test[x_names].values,
-                                  treatment=df_test['treatment_group_key'].values)
-
-    auuc_metrics = pd.DataFrame({'tau_pred': tau_pred.flatten(),
-                                 'W': df_test['treatment_group_key'].values,
-                                 CONVERSION: df_test[CONVERSION].values,
-                                 'treatment_effect_col': df_test['treatment_effect'].values})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col=CONVERSION,
-                          treatment_col='W',
-                          treatment_effect_col='treatment_effect_col')
+    uplift_model.fit(
+        X=df_train[x_names].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+    )
+
+    tau_pred = uplift_model.predict(
+        X=df_test[x_names].values, treatment=df_test["treatment_group_key"].values
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "tau_pred": tau_pred.flatten(),
+            "W": df_test["treatment_group_key"].values,
+            CONVERSION: df_test[CONVERSION].values,
+            "treatment_effect_col": df_test["treatment_effect"].values,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics,
+        outcome_col=CONVERSION,
+        treatment_col="W",
+        treatment_effect_col="treatment_effect_col",
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['tau_pred'].sum() > cumgain['Random'].sum()
+    assert cumgain["tau_pred"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseXClassifier(generate_classification_data):
@@ -549,54 +674,69 @@ def test_BaseXClassifier(generate_classification_data):
 
     df, x_names = generate_classification_data()
 
-    df['treatment_group_key'] = np.where(df['treatment_group_key'] == CONTROL_NAME, 0, 1)
+    df["treatment_group_key"] = np.where(
+        df["treatment_group_key"] == CONTROL_NAME, 0, 1
+    )
 
     propensity_model = LogisticRegression()
-    propensity_model.fit(X=df[x_names].values, y=df['treatment_group_key'].values)
-    df['propensity_score'] = propensity_model.predict_proba(df[x_names].values)[:, 1]
+    propensity_model.fit(X=df[x_names].values, y=df["treatment_group_key"].values)
+    df["propensity_score"] = propensity_model.predict_proba(df[x_names].values)[:, 1]
 
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     # specify all 4 learners
-    uplift_model = BaseXClassifier(control_outcome_learner=XGBClassifier(),
-                                   control_effect_learner=XGBRegressor(),
-                                   treatment_outcome_learner=XGBClassifier(),
-                                   treatment_effect_learner=XGBRegressor())
-
-    uplift_model.fit(X=df_train[x_names].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values)
-
-    tau_pred = uplift_model.predict(X=df_test[x_names].values,
-                                  p=df_test['propensity_score'].values)
+    uplift_model = BaseXClassifier(
+        control_outcome_learner=XGBClassifier(),
+        control_effect_learner=XGBRegressor(),
+        treatment_outcome_learner=XGBClassifier(),
+        treatment_effect_learner=XGBRegressor(),
+    )
+
+    uplift_model.fit(
+        X=df_train[x_names].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+    )
+
+    tau_pred = uplift_model.predict(
+        X=df_test[x_names].values, p=df_test["propensity_score"].values
+    )
 
     # specify 2 learners
-    uplift_model = BaseXClassifier(outcome_learner=XGBClassifier(),
-                                   effect_learner=XGBRegressor())
+    uplift_model = BaseXClassifier(
+        outcome_learner=XGBClassifier(), effect_learner=XGBRegressor()
+    )
 
-    uplift_model.fit(X=df_train[x_names].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values)
+    uplift_model.fit(
+        X=df_train[x_names].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+    )
 
-    tau_pred = uplift_model.predict(X=df_test[x_names].values,
-                                  p=df_test['propensity_score'].values)
+    tau_pred = uplift_model.predict(
+        X=df_test[x_names].values, p=df_test["propensity_score"].values
+    )
 
     # calculate metrics
-    auuc_metrics = pd.DataFrame({'tau_pred': tau_pred.flatten(),
-                                 'W': df_test['treatment_group_key'].values,
-                                 CONVERSION: df_test[CONVERSION].values,
-                                 'treatment_effect_col': df_test['treatment_effect'].values})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col=CONVERSION,
-                          treatment_col='W',
-                          treatment_effect_col='treatment_effect_col')
+    auuc_metrics = pd.DataFrame(
+        {
+            "tau_pred": tau_pred.flatten(),
+            "W": df_test["treatment_group_key"].values,
+            CONVERSION: df_test[CONVERSION].values,
+            "treatment_effect_col": df_test["treatment_effect"].values,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics,
+        outcome_col=CONVERSION,
+        treatment_col="W",
+        treatment_effect_col="treatment_effect_col",
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['tau_pred'].sum() > cumgain['Random'].sum()
+    assert cumgain["tau_pred"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseRClassifier(generate_classification_data):
@@ -605,39 +745,48 @@ def test_BaseRClassifier(generate_classification_data):
 
     df, x_names = generate_classification_data()
 
-    df['treatment_group_key'] = np.where(df['treatment_group_key'] == CONTROL_NAME, 0, 1)
+    df["treatment_group_key"] = np.where(
+        df["treatment_group_key"] == CONTROL_NAME, 0, 1
+    )
 
     propensity_model = LogisticRegression()
-    propensity_model.fit(X=df[x_names].values, y=df['treatment_group_key'].values)
-    df['propensity_score'] = propensity_model.predict_proba(df[x_names].values)[:, 1]
+    propensity_model.fit(X=df[x_names].values, y=df["treatment_group_key"].values)
+    df["propensity_score"] = propensity_model.predict_proba(df[x_names].values)[:, 1]
 
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
-    uplift_model = BaseRClassifier(outcome_learner=XGBClassifier(),
-                                   effect_learner=XGBRegressor())
+    uplift_model = BaseRClassifier(
+        outcome_learner=XGBClassifier(), effect_learner=XGBRegressor()
+    )
 
-    uplift_model.fit(X=df_train[x_names].values,
-                     p=df_train['propensity_score'].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values)
+    uplift_model.fit(
+        X=df_train[x_names].values,
+        p=df_train["propensity_score"].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+    )
 
     tau_pred = uplift_model.predict(X=df_test[x_names].values)
 
-    auuc_metrics = pd.DataFrame({'tau_pred': tau_pred.flatten(),
-                                 'W': df_test['treatment_group_key'].values,
-                                 CONVERSION: df_test[CONVERSION].values,
-                                 'treatment_effect_col': df_test['treatment_effect'].values})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col=CONVERSION,
-                          treatment_col='W',
-                          treatment_effect_col='treatment_effect_col')
+    auuc_metrics = pd.DataFrame(
+        {
+            "tau_pred": tau_pred.flatten(),
+            "W": df_test["treatment_group_key"].values,
+            CONVERSION: df_test[CONVERSION].values,
+            "treatment_effect_col": df_test["treatment_effect"].values,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics,
+        outcome_col=CONVERSION,
+        treatment_col="W",
+        treatment_effect_col="treatment_effect_col",
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['tau_pred'].sum() > cumgain['Random'].sum()
+    assert cumgain["tau_pred"].sum() > cumgain["Random"].sum()
 
 
 def test_BaseRClassifier_with_sample_weights(generate_classification_data):
@@ -646,52 +795,63 @@ def test_BaseRClassifier_with_sample_weights(generate_classification_data):
 
     df, x_names = generate_classification_data()
 
-    df['treatment_group_key'] = np.where(df['treatment_group_key'] == CONTROL_NAME, 0, 1)
-    df['sample_weights'] = np.random.randint(low=1, high=3, size=df.shape[0])
+    df["treatment_group_key"] = np.where(
+        df["treatment_group_key"] == CONTROL_NAME, 0, 1
+    )
+    df["sample_weights"] = np.random.randint(low=1, high=3, size=df.shape[0])
 
     propensity_model = LogisticRegression()
-    propensity_model.fit(X=df[x_names].values, y=df['treatment_group_key'].values)
-    df['propensity_score'] = propensity_model.predict_proba(df[x_names].values)[:, 1]
+    propensity_model.fit(X=df[x_names].values, y=df["treatment_group_key"].values)
+    df["propensity_score"] = propensity_model.predict_proba(df[x_names].values)[:, 1]
 
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
-    uplift_model = BaseRClassifier(outcome_learner=XGBClassifier(),
-                                   effect_learner=XGBRegressor())
+    uplift_model = BaseRClassifier(
+        outcome_learner=XGBClassifier(), effect_learner=XGBRegressor()
+    )
 
-    uplift_model.fit(X=df_train[x_names].values,
-                     p=df_train['propensity_score'].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values,
-                     sample_weight=df_train['sample_weights'])
+    uplift_model.fit(
+        X=df_train[x_names].values,
+        p=df_train["propensity_score"].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+        sample_weight=df_train["sample_weights"],
+    )
 
     tau_pred = uplift_model.predict(X=df_test[x_names].values)
 
-    auuc_metrics = pd.DataFrame({'tau_pred': tau_pred.flatten(),
-                                 'W': df_test['treatment_group_key'].values,
-                                 CONVERSION: df_test[CONVERSION].values,
-                                 'treatment_effect_col': df_test['treatment_effect'].values})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col=CONVERSION,
-                          treatment_col='W',
-                          treatment_effect_col='treatment_effect_col')
+    auuc_metrics = pd.DataFrame(
+        {
+            "tau_pred": tau_pred.flatten(),
+            "W": df_test["treatment_group_key"].values,
+            CONVERSION: df_test[CONVERSION].values,
+            "treatment_effect_col": df_test["treatment_effect"].values,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics,
+        outcome_col=CONVERSION,
+        treatment_col="W",
+        treatment_effect_col="treatment_effect_col",
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['tau_pred'].sum() > cumgain['Random'].sum()
+    assert cumgain["tau_pred"].sum() > cumgain["Random"].sum()
 
     # Check if XGBRRegressor successfully produces treatment effect estimation
     # when sample_weight is passed
     uplift_model = XGBRRegressor()
-    uplift_model.fit(X=df_train[x_names].values,
-                     p=df_train['propensity_score'].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values,
-                     sample_weight=df_train['sample_weights'])
+    uplift_model.fit(
+        X=df_train[x_names].values,
+        p=df_train["propensity_score"].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+        sample_weight=df_train["sample_weights"],
+    )
     tau_pred = uplift_model.predict(X=df_test[x_names].values)
-    assert len(tau_pred) == len(df_test['sample_weights'].values)
+    assert len(tau_pred) == len(df_test["sample_weights"].values)
 
 
 def test_pandas_input(generate_regression_data):
@@ -703,7 +863,9 @@ def test_pandas_input(generate_regression_data):
 
     try:
         learner = BaseSLearner(learner=LinearRegression())
-        ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True)
+        ate_p, lb, ub = learner.estimate_ate(
+            X=X, treatment=treatment, y=y, return_ci=True
+        )
     except AttributeError:
         assert False
     try:
@@ -727,10 +889,13 @@ def test_pandas_input(generate_regression_data):
     except AttributeError:
         assert False
 
+
 def test_BaseDRLearner(generate_regression_data):
     y, X, treatment, tau, b, e = generate_regression_data()
 
-    learner = BaseDRLearner(learner=XGBRegressor(), treatment_effect_learner=LinearRegression())
+    learner = BaseDRLearner(
+        learner=XGBRegressor(), treatment_effect_learner=LinearRegression()
+    )
 
     # check the accuracy of the ATE estimation
     ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
@@ -738,18 +903,23 @@ def test_BaseDRLearner(generate_regression_data):
     assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
 
     # check the accuracy of the CATE estimation with the bootstrap CI
-    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10)
-
-    auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(),
-                                 'W': treatment,
-                                 'y': y,
-                                 'treatment_effect_col': tau})
-
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col='y',
-                          treatment_col='W',
-                          treatment_effect_col='tau')
+    cate_p, _, _ = learner.fit_predict(
+        X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10
+    )
+
+    auuc_metrics = pd.DataFrame(
+        {
+            "cate_p": cate_p.flatten(),
+            "W": treatment,
+            "y": y,
+            "treatment_effect_col": tau,
+        }
+    )
+
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau"
+    )
 
     # Check if the cumulative gain when using the model's prediction is
     # higher than it would be under random targeting
-    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
+    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
diff --git a/tests/test_propensity.py b/tests/test_propensity.py
index 9c2e9aa1..f399afa5 100644
--- a/tests/test_propensity.py
+++ b/tests/test_propensity.py
@@ -1,7 +1,7 @@
 from causalml.propensity import (
     ElasticNetPropensityModel,
     GradientBoostedPropensityModel,
-    LogisticRegressionPropensityModel
+    LogisticRegressionPropensityModel,
 )
 from causalml.metrics import roc_auc_score
 
@@ -15,7 +15,7 @@ def test_logistic_regression_propensity_model(generate_regression_data):
     pm = LogisticRegressionPropensityModel(random_state=RANDOM_SEED)
     ps = pm.fit_predict(X, treatment)
 
-    assert roc_auc_score(treatment, ps) > .5
+    assert roc_auc_score(treatment, ps) > 0.5
 
 
 def test_logistic_regression_propensity_model_model_kwargs(generate_regression_data):
@@ -32,7 +32,7 @@ def test_elasticnet_propensity_model(generate_regression_data):
     pm = ElasticNetPropensityModel(random_state=RANDOM_SEED)
     ps = pm.fit_predict(X, treatment)
 
-    assert roc_auc_score(treatment, ps) > .5
+    assert roc_auc_score(treatment, ps) > 0.5
 
 
 def test_gradientboosted_propensity_model(generate_regression_data):
@@ -41,7 +41,7 @@ def test_gradientboosted_propensity_model(generate_regression_data):
     pm = GradientBoostedPropensityModel(random_state=RANDOM_SEED)
     ps = pm.fit_predict(X, treatment)
 
-    assert roc_auc_score(treatment, ps) > .5
+    assert roc_auc_score(treatment, ps) > 0.5
 
 
 def test_gradientboosted_propensity_model_earlystopping(generate_regression_data):
@@ -50,4 +50,4 @@ def test_gradientboosted_propensity_model_earlystopping(generate_regression_data
     pm = GradientBoostedPropensityModel(random_state=RANDOM_SEED, early_stop=True)
     ps = pm.fit_predict(X, treatment)
 
-    assert roc_auc_score(treatment, ps) > .5
+    assert roc_auc_score(treatment, ps) > 0.5
diff --git a/tests/test_sensitivity.py b/tests/test_sensitivity.py
index 4c25f676..7690bbc7 100644
--- a/tests/test_sensitivity.py
+++ b/tests/test_sensitivity.py
@@ -1,4 +1,3 @@
-
 import pandas as pd
 import numpy as np
 from sklearn.linear_model import LinearRegression
@@ -6,18 +5,31 @@
 from causalml.dataset import synthetic_data
 from causalml.inference.meta import BaseXLearner
 from causalml.metrics.sensitivity import Sensitivity
-from causalml.metrics.sensitivity import SensitivityPlaceboTreatment, SensitivityRandomCause
-from causalml.metrics.sensitivity import SensitivityRandomReplace, SensitivitySelectionBias
-from causalml.metrics.sensitivity import one_sided, alignment, one_sided_att, alignment_att
+from causalml.metrics.sensitivity import (
+    SensitivityPlaceboTreatment,
+    SensitivityRandomCause,
+)
+from causalml.metrics.sensitivity import (
+    SensitivityRandomReplace,
+    SensitivitySelectionBias,
+)
+from causalml.metrics.sensitivity import (
+    one_sided,
+    alignment,
+    one_sided_att,
+    alignment_att,
+)
 
 from .const import TREATMENT_COL, SCORE_COL, OUTCOME_COL, NUM_FEATURES
 
 
 def test_Sensitivity():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
 
     # generate the dataset format for sensitivity analysis
-    INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)]
+    INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)]
     df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
     df[TREATMENT_COL] = treatment
     df[OUTCOME_COL] = y
@@ -25,24 +37,37 @@ def test_Sensitivity():
 
     # calling the Base XLearner class and return the sensitivity analysis summary report
     learner = BaseXLearner(LinearRegression())
-    sens = Sensitivity(df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL,
-                       treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner)
+    sens = Sensitivity(
+        df=df,
+        inference_features=INFERENCE_FEATURES,
+        p_col=SCORE_COL,
+        treatment_col=TREATMENT_COL,
+        outcome_col=OUTCOME_COL,
+        learner=learner,
+    )
 
     # check the sensitivity summary report
-    sens_summary = sens.sensitivity_analysis(methods=['Placebo Treatment',
-                                                      'Random Cause',
-                                                      'Subset Data',
-                                                      'Random Replace',
-                                                      'Selection Bias'], sample_size=0.5)
+    sens_summary = sens.sensitivity_analysis(
+        methods=[
+            "Placebo Treatment",
+            "Random Cause",
+            "Subset Data",
+            "Random Replace",
+            "Selection Bias",
+        ],
+        sample_size=0.5,
+    )
 
     print(sens_summary)
 
 
 def test_SensitivityPlaceboTreatment():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
 
     # generate the dataset format for sensitivity analysis
-    INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)]
+    INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)]
     df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
     df[TREATMENT_COL] = treatment
     df[OUTCOME_COL] = y
@@ -50,18 +75,26 @@ def test_SensitivityPlaceboTreatment():
 
     # calling the Base XLearner class and return the sensitivity analysis summary report
     learner = BaseXLearner(LinearRegression())
-    sens = SensitivityPlaceboTreatment(df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL,
-                                       treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner)
-
-    sens_summary = sens.summary(method='Random Cause')
+    sens = SensitivityPlaceboTreatment(
+        df=df,
+        inference_features=INFERENCE_FEATURES,
+        p_col=SCORE_COL,
+        treatment_col=TREATMENT_COL,
+        outcome_col=OUTCOME_COL,
+        learner=learner,
+    )
+
+    sens_summary = sens.summary(method="Random Cause")
     print(sens_summary)
 
 
 def test_SensitivityRandomCause():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
 
     # generate the dataset format for sensitivity analysis
-    INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)]
+    INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)]
     df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
     df[TREATMENT_COL] = treatment
     df[OUTCOME_COL] = y
@@ -69,18 +102,26 @@ def test_SensitivityRandomCause():
 
     # calling the Base XLearner class and return the sensitivity analysis summary report
     learner = BaseXLearner(LinearRegression())
-    sens = SensitivityRandomCause(df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL,
-                                  treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner)
-
-    sens_summary = sens.summary(method='Random Cause')
+    sens = SensitivityRandomCause(
+        df=df,
+        inference_features=INFERENCE_FEATURES,
+        p_col=SCORE_COL,
+        treatment_col=TREATMENT_COL,
+        outcome_col=OUTCOME_COL,
+        learner=learner,
+    )
+
+    sens_summary = sens.summary(method="Random Cause")
     print(sens_summary)
 
 
 def test_SensitivityRandomReplace():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
 
     # generate the dataset format for sensitivity analysis
-    INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)]
+    INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)]
     df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
     df[TREATMENT_COL] = treatment
     df[OUTCOME_COL] = y
@@ -88,19 +129,28 @@ def test_SensitivityRandomReplace():
 
     # calling the Base XLearner class and return the sensitivity analysis summary report
     learner = BaseXLearner(LinearRegression())
-    sens = SensitivityRandomReplace(df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL,
-                                  treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner,
-                                  sample_size=0.9, replaced_feature='feature_0')
-
-    sens_summary = sens.summary(method='Random Replace')
+    sens = SensitivityRandomReplace(
+        df=df,
+        inference_features=INFERENCE_FEATURES,
+        p_col=SCORE_COL,
+        treatment_col=TREATMENT_COL,
+        outcome_col=OUTCOME_COL,
+        learner=learner,
+        sample_size=0.9,
+        replaced_feature="feature_0",
+    )
+
+    sens_summary = sens.summary(method="Random Replace")
     print(sens_summary)
 
 
 def test_SensitivitySelectionBias():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
 
     # generate the dataset format for sensitivity analysis
-    INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)]
+    INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)]
     df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
     df[TREATMENT_COL] = treatment
     df[OUTCOME_COL] = y
@@ -108,8 +158,16 @@ def test_SensitivitySelectionBias():
 
     # calling the Base XLearner class and return the sensitivity analysis summary report
     learner = BaseXLearner(LinearRegression())
-    sens = SensitivitySelectionBias(df, INFERENCE_FEATURES, p_col=SCORE_COL, treatment_col=TREATMENT_COL,
-                                    outcome_col=OUTCOME_COL, learner=learner, confound='alignment', alpha_range=None)
+    sens = SensitivitySelectionBias(
+        df,
+        INFERENCE_FEATURES,
+        p_col=SCORE_COL,
+        treatment_col=TREATMENT_COL,
+        outcome_col=OUTCOME_COL,
+        learner=learner,
+        confound="alignment",
+        alpha_range=None,
+    )
 
     lls_bias_alignment, partial_rsqs_bias_alignment = sens.causalsens()
     print(lls_bias_alignment, partial_rsqs_bias_alignment)
@@ -119,7 +177,9 @@ def test_SensitivitySelectionBias():
 
 
 def test_one_sided():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
     alpha = np.quantile(y, 0.25)
     adj = one_sided(alpha, e, treatment)
 
@@ -127,7 +187,9 @@ def test_one_sided():
 
 
 def test_alignment():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
     alpha = np.quantile(y, 0.25)
     adj = alignment(alpha, e, treatment)
 
@@ -135,7 +197,9 @@ def test_alignment():
 
 
 def test_one_sided_att():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
     alpha = np.quantile(y, 0.25)
     adj = one_sided_att(alpha, e, treatment)
 
@@ -143,7 +207,9 @@ def test_one_sided_att():
 
 
 def test_alignment_att():
-    y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0)
+    y, X, treatment, tau, b, e = synthetic_data(
+        mode=1, n=100000, p=NUM_FEATURES, sigma=1.0
+    )
     alpha = np.quantile(y, 0.25)
     adj = alignment_att(alpha, e, treatment)
 
diff --git a/tests/test_uplift_trees.py b/tests/test_uplift_trees.py
index af393e08..652562ee 100644
--- a/tests/test_uplift_trees.py
+++ b/tests/test_uplift_trees.py
@@ -17,13 +17,13 @@ def test_make_uplift_classification(generate_classification_data):
     assert df.shape[0] == N_SAMPLE * len(TREATMENT_NAMES)
 
 
-@pytest.mark.parametrize("backend", ['loky', 'threading', 'multiprocessing'])
-@pytest.mark.parametrize("joblib_prefer", ['threads', 'processes'])
-def test_UpliftRandomForestClassifier(generate_classification_data, backend, joblib_prefer):
+@pytest.mark.parametrize("backend", ["loky", "threading", "multiprocessing"])
+@pytest.mark.parametrize("joblib_prefer", ["threads", "processes"])
+def test_UpliftRandomForestClassifier(
+    generate_classification_data, backend, joblib_prefer
+):
     df, x_names = generate_classification_data()
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     with parallel_backend(backend):
         # Train the UpLift Random Forest classifier
@@ -31,12 +31,14 @@ def test_UpliftRandomForestClassifier(generate_classification_data, backend, job
             min_samples_leaf=50,
             control_name=TREATMENT_NAMES[0],
             random_state=RANDOM_SEED,
-            joblib_prefer=joblib_prefer
+            joblib_prefer=joblib_prefer,
         )
 
-        uplift_model.fit(df_train[x_names].values,
-                         treatment=df_train['treatment_group_key'].values,
-                         y=df_train[CONVERSION].values)
+        uplift_model.fit(
+            df_train[x_names].values,
+            treatment=df_train["treatment_group_key"].values,
+            y=df_train[CONVERSION].values,
+        )
 
         predictions = {}
         predictions["single"] = uplift_model.predict(df_test[x_names].values)
@@ -45,7 +47,9 @@ def test_UpliftRandomForestClassifier(generate_classification_data, backend, job
         with parallel_backend("threading", n_jobs=2):
             predictions["threading_2"] = uplift_model.predict(df_test[x_names].values)
         with parallel_backend("multiprocessing", n_jobs=2):
-            predictions["multiprocessing_2"] = uplift_model.predict(df_test[x_names].values)
+            predictions["multiprocessing_2"] = uplift_model.predict(
+                df_test[x_names].values
+            )
 
         # assert that the predictions coincide for single and all parallel computations
         iterator = iter(predictions.values())
@@ -55,19 +59,19 @@ def test_UpliftRandomForestClassifier(generate_classification_data, backend, job
         y_pred = list(predictions.values())[0]
         result = pd.DataFrame(y_pred, columns=uplift_model.classes_[1:])
 
-        best_treatment = np.where((result < 0).all(axis=1),
-                                  CONTROL_NAME,
-                                  result.idxmax(axis=1))
+        best_treatment = np.where(
+            (result < 0).all(axis=1), CONTROL_NAME, result.idxmax(axis=1)
+        )
 
         # Create a synthetic population:
 
         # Create indicator variables for whether a unit happened to have the
         # recommended treatment or was in the control group
         actual_is_best = np.where(
-            df_test['treatment_group_key'] == best_treatment, 1, 0
+            df_test["treatment_group_key"] == best_treatment, 1, 0
         )
         actual_is_control = np.where(
-            df_test['treatment_group_key'] == CONTROL_NAME, 1, 0
+            df_test["treatment_group_key"] == CONTROL_NAME, 1, 0
         )
 
         synthetic = (actual_is_best == 1) | (actual_is_control == 1)
@@ -76,56 +80,54 @@ def test_UpliftRandomForestClassifier(generate_classification_data, backend, job
         auuc_metrics = synth.assign(
             is_treated=1 - actual_is_control[synthetic],
             conversion=df_test.loc[synthetic, CONVERSION].values,
-            uplift_tree=synth.max(axis=1)
+            uplift_tree=synth.max(axis=1),
         ).drop(columns=list(uplift_model.classes_[1:]))
 
-        cumgain = get_cumgain(auuc_metrics,
-                              outcome_col=CONVERSION,
-                              treatment_col='is_treated')
+        cumgain = get_cumgain(
+            auuc_metrics, outcome_col=CONVERSION, treatment_col="is_treated"
+        )
 
         # Check if the cumulative gain of UpLift Random Forest is higher than
         # random
-        assert cumgain['uplift_tree'].sum() > cumgain['Random'].sum()
+        assert cumgain["uplift_tree"].sum() > cumgain["Random"].sum()
 
 
 def test_UpliftTreeClassifier(generate_classification_data):
     df, x_names = generate_classification_data()
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     # Train the UpLift Random Forest classifier
-    uplift_model = UpliftTreeClassifier(control_name=TREATMENT_NAMES[0], random_state=RANDOM_SEED)
+    uplift_model = UpliftTreeClassifier(
+        control_name=TREATMENT_NAMES[0], random_state=RANDOM_SEED
+    )
 
-    pr = cProfile.Profile(subcalls=True, builtins=True, timeunit=.001)
+    pr = cProfile.Profile(subcalls=True, builtins=True, timeunit=0.001)
     pr.enable()
-    uplift_model.fit(df_train[x_names].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values)
+    uplift_model.fit(
+        df_train[x_names].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+    )
 
     y_pred = uplift_model.predict(df_test[x_names].values)
     pr.disable()
-    with open('UpliftTreeClassifier.prof', 'w') as f:
-        ps = pstats.Stats(pr, stream=f).sort_stats('cumulative')
+    with open("UpliftTreeClassifier.prof", "w") as f:
+        ps = pstats.Stats(pr, stream=f).sort_stats("cumulative")
         ps.print_stats()
 
     result = pd.DataFrame(y_pred, columns=uplift_model.classes_)
     result.drop(CONTROL_NAME, axis=1, inplace=True)
 
-    best_treatment = np.where((result < 0).all(axis=1),
-                              CONTROL_NAME,
-                              result.idxmax(axis=1))
+    best_treatment = np.where(
+        (result < 0).all(axis=1), CONTROL_NAME, result.idxmax(axis=1)
+    )
 
     # Create a synthetic population:
 
     # Create indicator variables for whether a unit happened to have the
     # recommended treatment or was in the control group
-    actual_is_best = np.where(
-        df_test['treatment_group_key'] == best_treatment, 1, 0
-    )
-    actual_is_control = np.where(
-        df_test['treatment_group_key'] == CONTROL_NAME, 1, 0
-    )
+    actual_is_best = np.where(df_test["treatment_group_key"] == best_treatment, 1, 0)
+    actual_is_control = np.where(df_test["treatment_group_key"] == CONTROL_NAME, 1, 0)
 
     synthetic = (actual_is_best == 1) | (actual_is_control == 1)
     synth = result[synthetic]
@@ -133,16 +135,16 @@ def test_UpliftTreeClassifier(generate_classification_data):
     auuc_metrics = synth.assign(
         is_treated=1 - actual_is_control[synthetic],
         conversion=df_test.loc[synthetic, CONVERSION].values,
-        uplift_tree=synth.max(axis=1)
+        uplift_tree=synth.max(axis=1),
     ).drop(columns=result.columns)
 
-    cumgain = get_cumgain(auuc_metrics,
-                          outcome_col=CONVERSION,
-                          treatment_col='is_treated')
+    cumgain = get_cumgain(
+        auuc_metrics, outcome_col=CONVERSION, treatment_col="is_treated"
+    )
 
     # Check if the cumulative gain of UpLift Random Forest is higher than
     # random
-    assert cumgain['uplift_tree'].sum() > cumgain['Random'].sum()
+    assert cumgain["uplift_tree"].sum() > cumgain["Random"].sum()
 
     # Check if the total count is split correctly, at least for control group in the first level
     def validate_cnt(cur_tree):
@@ -157,41 +159,45 @@ def validate_cnt(cur_tree):
         return [parent_control_cnt, next_level_control_cnt]
 
     counts = validate_cnt(uplift_model.fitted_uplift_tree)
-    assert (counts[0] > 0 and counts[0] == counts[1])
+    assert counts[0] > 0 and counts[0] == counts[1]
 
     # Check if it works as expected after filling with validation data
-    uplift_model.fill(df_test[x_names].values,
-                      treatment=df_test['treatment_group_key'].values,
-                      y=df_test[CONVERSION].values)
+    uplift_model.fill(
+        df_test[x_names].values,
+        treatment=df_test["treatment_group_key"].values,
+        y=df_test[CONVERSION].values,
+    )
     counts = validate_cnt(uplift_model.fitted_uplift_tree)
-    assert (counts[0] > 0 and counts[0] == counts[1])
+    assert counts[0] > 0 and counts[0] == counts[1]
 
 
 def test_UpliftTreeClassifier_feature_importance(generate_classification_data):
     # test if feature importance is working as expected
     df, x_names = generate_classification_data()
-    df_train, df_test = train_test_split(df,
-                                         test_size=0.2,
-                                         random_state=RANDOM_SEED)
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     # Train the upLift classifier
-    uplift_model = UpliftTreeClassifier(control_name=TREATMENT_NAMES[0], random_state=RANDOM_SEED)
-    uplift_model.fit(df_train[x_names].values,
-                     treatment=df_train['treatment_group_key'].values,
-                     y=df_train[CONVERSION].values)
+    uplift_model = UpliftTreeClassifier(
+        control_name=TREATMENT_NAMES[0], random_state=RANDOM_SEED
+    )
+    uplift_model.fit(
+        df_train[x_names].values,
+        treatment=df_train["treatment_group_key"].values,
+        y=df_train[CONVERSION].values,
+    )
 
-    assert hasattr(uplift_model, 'feature_importances_')
-    assert (np.all(uplift_model.feature_importances_ >= 0))
-    num_non_zero_imp_features = sum([1 if imp > 0 else 0 for imp in uplift_model.feature_importances_])
+    assert hasattr(uplift_model, "feature_importances_")
+    assert np.all(uplift_model.feature_importances_ >= 0)
+    num_non_zero_imp_features = sum(
+        [1 if imp > 0 else 0 for imp in uplift_model.feature_importances_]
+    )
 
     def getNonleafCount(node):
         # base case
-        if (node is None or (node.trueBranch is None and
-                             node.falseBranch is None)):
+        if node is None or (node.trueBranch is None and node.falseBranch is None):
             return 0
         # If root is Not None and its one of its child is also not None
-        return (1 + getNonleafCount(node.trueBranch) +
-                getNonleafCount(node.falseBranch))
+        return 1 + getNonleafCount(node.trueBranch) + getNonleafCount(node.falseBranch)
 
     num_non_leaf_nodes = getNonleafCount(uplift_model.fitted_uplift_tree)
     # Check if the features with positive importance is not more than number of nodes
diff --git a/tests/test_value_optimization.py b/tests/test_value_optimization.py
index 00bc756d..8c504d5e 100644
--- a/tests/test_value_optimization.py
+++ b/tests/test_value_optimization.py
@@ -17,53 +17,64 @@
 def test_counterfactual_value_optimization():
 
     df, X_names = make_uplift_classification(
-        n_samples=2000, treatment_name=['control', 'treatment1', 'treatment2'])
+        n_samples=2000, treatment_name=["control", "treatment1", "treatment2"]
+    )
     df_train, df_test = train_test_split(df, test_size=0.2, random_state=RANDOM_SEED)
 
     train_idx = df_train.index
     test_idx = df_test.index
 
-    conversion_cost_dict = {'control': 0, 'treatment1': 2.5, 'treatment2': 5}
-    impression_cost_dict = {'control': 0, 'treatment1': 0, 'treatment2': 0.02}
+    conversion_cost_dict = {"control": 0, "treatment1": 2.5, "treatment2": 5}
+    impression_cost_dict = {"control": 0, "treatment1": 0, "treatment2": 0.02}
 
-    cc_array, ic_array, conditions = get_treatment_costs(treatment=df['treatment_group_key'],
-                                                         control_name='control',
-                                                         cc_dict=conversion_cost_dict,
-                                                         ic_dict=impression_cost_dict)
+    cc_array, ic_array, conditions = get_treatment_costs(
+        treatment=df["treatment_group_key"],
+        control_name="control",
+        cc_dict=conversion_cost_dict,
+        ic_dict=impression_cost_dict,
+    )
     conversion_value_array = np.full(df.shape[0], 20)
 
-    actual_value = get_actual_value(treatment=df['treatment_group_key'],
-                                    observed_outcome=df['conversion'],
-                                    conversion_value=conversion_value_array,
-                                    conditions=conditions,
-                                    conversion_cost=cc_array,
-                                    impression_cost=ic_array)
+    actual_value = get_actual_value(
+        treatment=df["treatment_group_key"],
+        observed_outcome=df["conversion"],
+        conversion_value=conversion_value_array,
+        conditions=conditions,
+        conversion_cost=cc_array,
+        impression_cost=ic_array,
+    )
 
     random_allocation_value = actual_value.loc[test_idx].mean()
 
-    tm = BaseTClassifier(learner=LogisticRegression(), control_name='control')
-    tm.fit(df_train[X_names].values, df_train['treatment_group_key'], df_train['conversion'])
+    tm = BaseTClassifier(learner=LogisticRegression(), control_name="control")
+    tm.fit(
+        df_train[X_names].values,
+        df_train["treatment_group_key"],
+        df_train["conversion"],
+    )
     tm_pred = tm.predict(df_test[X_names].values)
 
     proba_model = LogisticRegression()
 
-    W_dummies = pd.get_dummies(df['treatment_group_key'])
+    W_dummies = pd.get_dummies(df["treatment_group_key"])
     XW = np.c_[df[X_names], W_dummies]
-    proba_model.fit(XW[train_idx], df_train['conversion'])
+    proba_model.fit(XW[train_idx], df_train["conversion"])
     y_proba = proba_model.predict_proba(XW[test_idx])[:, 1]
 
-    cve = CounterfactualValueEstimator(treatment=df_test['treatment_group_key'],
-                                       control_name='control',
-                                       treatment_names=conditions[1:],
-                                       y_proba=y_proba,
-                                       cate=tm_pred,
-                                       value=conversion_value_array[test_idx],
-                                       conversion_cost=cc_array[test_idx],
-                                       impression_cost=ic_array[test_idx])
+    cve = CounterfactualValueEstimator(
+        treatment=df_test["treatment_group_key"],
+        control_name="control",
+        treatment_names=conditions[1:],
+        y_proba=y_proba,
+        cate=tm_pred,
+        value=conversion_value_array[test_idx],
+        conversion_cost=cc_array[test_idx],
+        impression_cost=ic_array[test_idx],
+    )
 
     cve_best_idx = cve.predict_best()
     cve_best = [conditions[idx] for idx in cve_best_idx]
-    actual_is_cve_best = df.loc[test_idx, 'treatment_group_key'] == cve_best
+    actual_is_cve_best = df.loc[test_idx, "treatment_group_key"] == cve_best
     cve_value = actual_value.loc[test_idx][actual_is_cve_best].mean()
 
     assert cve_value > random_allocation_value