diff --git a/docs/source/content/examples/data_manaer.ipynb b/docs/source/content/examples/data_manaer.ipynb
new file mode 100644
index 0000000..bf02536
--- /dev/null
+++ b/docs/source/content/examples/data_manaer.ipynb
@@ -0,0 +1,239 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Data Manager\n",
+    "When doing active learning we have our Original Data (OD) Labeled Data [LD] and Unlabeled Data [UD]\n",
+    "where UD and LD are subsets of OD.\n",
+    "The active learner operates on UD and returns indexes relative to it. We want to store those indices with respect\n",
+    "to OD, and sometimes see the subset of labels of LD. (The subset of labels of UD is Null)\n",
+    "\n",
+    "That's a fancy way of saying there is a lot book keeping to be done and this class solves that by doing it for you\n",
+    "\n",
+    "The main idea is that we store a mask (labeeld_mask) of indices that have been labeled and then expose UD , LD\n",
+    "and the labels by using fancy indexing with that mask. The manager exposes a an add_labels method which lets the\n",
+    "user add labels indexed with respect to UD and it will adjust the indices so that they match OD.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Preparation\n",
+    "In this part we prepare the data and learners, all normal stuff you've seen in other examples. \n",
+    "Some differences is that we're working with text "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\"\"\"\n",
+    "This example shows how to use the new data manager class.\n",
+    "For clarity, all the setup has been moved into functions and\n",
+    "the core is in the __main__ section which is commented\n",
+    "\n",
+    "Also look at prepare_manager to see how a DataManager is instantiated\n",
+    "\n",
+    "\"\"\"\n",
+    "\n",
+    "from sklearn.datasets import fetch_20newsgroups\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from modAL.datamanager import DataManager\n",
+    "import numpy as np\n",
+    "import matplotlib as mpl\n",
+    "import matplotlib.pyplot as plt\n",
+    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "from functools import partial\n",
+    "\n",
+    "\n",
+    "from modAL.models import ActiveLearner\n",
+    "from modAL.batch import uncertainty_batch_sampling\n",
+    "\n",
+    "RANDOM_STATE_SEED = 123\n",
+    "np.random.seed(RANDOM_STATE_SEED)\n",
+    "BATCH_SIZE = 5\n",
+    "N_QUERIES = 50\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Define Utility Functions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_data():\n",
+    "    SKIP_SIZE = 50  # Skip to make the example go fast.\n",
+    "    docs, original_labels = fetch_20newsgroups(return_X_y=True)\n",
+    "    docs_train = docs[::SKIP_SIZE]\n",
+    "    original_labels_train = original_labels[::SKIP_SIZE]\n",
+    "    docs_test = docs[1::SKIP_SIZE]  # Offset by one means no overlap\n",
+    "    original_labels_test = original_labels[\n",
+    "        1::SKIP_SIZE\n",
+    "    ]  # Offset by one means no overlap\n",
+    "    return docs_train, original_labels_train, docs_test, original_labels_test\n",
+    "\n",
+    "\n",
+    "def prepare_features(docs_train, docs_test):\n",
+    "    vectorizer = TfidfVectorizer(\n",
+    "        stop_words=\"english\", ngram_range=(1, 3), max_df=0.9, max_features=5000\n",
+    "    )\n",
+    "\n",
+    "    vectors_train = vectorizer.fit_transform(docs_train).toarray()\n",
+    "    vectors_test = vectorizer.transform(docs_test).toarray()\n",
+    "    return vectors_train, vectors_test\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "def prepare_learner():\n",
+    "\n",
+    "    estimator = RandomForestClassifier()\n",
+    "    preset_batch = partial(uncertainty_batch_sampling, n_instances=BATCH_SIZE)\n",
+    "    learner = ActiveLearner(estimator=estimator, query_strategy=preset_batch)\n",
+    "    return learner\n",
+    "\n",
+    "\n",
+    "def make_pretty_summary_plot(performance_history):\n",
+    "    with plt.style.context(\"seaborn-white\"):\n",
+    "        fig, ax = plt.subplots(figsize=(8.5, 6), dpi=130)\n",
+    "\n",
+    "        ax.plot(performance_history)\n",
+    "        ax.scatter(range(len(performance_history)), performance_history, s=13)\n",
+    "\n",
+    "        ax.xaxis.set_major_locator(\n",
+    "            mpl.ticker.MaxNLocator(nbins=N_QUERIES + 3, integer=True)\n",
+    "        )\n",
+    "        ax.xaxis.grid(True)\n",
+    "\n",
+    "        ax.yaxis.set_major_locator(mpl.ticker.MaxNLocator(nbins=10))\n",
+    "        ax.yaxis.set_major_formatter(mpl.ticker.PercentFormatter(xmax=1))\n",
+    "        ax.set_ylim(bottom=0, top=1)\n",
+    "        ax.yaxis.grid(True, linestyle=\"--\", alpha=1 / 2)\n",
+    "\n",
+    "        ax.set_title(\"Incremental classification accuracy\")\n",
+    "        ax.set_xlabel(\"Query iteration\")\n",
+    "        ax.set_ylabel(\"Classification Accuracy\")\n",
+    "\n",
+    "        plt.show()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Instantiate The Data Manager\n",
+    "Here we instantiate the manager. We pass it the feature vectors we'll be training on as well as the original documents (so we can easily indiex them) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def prepare_manager(vectors_train, docs_train):\n",
+    "    manager = DataManager(vectors_train, sources=docs_train)\n",
+    "    return manager\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Using The Manager"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "docs_train, original_labels_train, docs_test, original_labels_test = prepare_data()\n",
+    "vectors_train, vectors_test = prepare_features(docs_train, docs_test)\n",
+    "manager = prepare_manager(vectors_train, docs_train)\n",
+    "learner = prepare_learner()\n",
+    "performance_history = []\n",
+    "# performance_history.append(learner.score(docs_test, original_labels_test))\n",
+    "\n",
+    "for i in range(N_QUERIES):\n",
+    "    # Check if there are more examples that are not labeled. If not, break\n",
+    "    if manager.unlabeld.size == 0:\n",
+    "        break\n",
+    "\n",
+    "    for index in range(1):\n",
+    "        # query the learner as usual, in this case we are using a batch learning strategy\n",
+    "        # so indices_to_label is an array\n",
+    "        indices_to_label, query_instance = learner.query(manager.unlabeld)\n",
+    "        labels = []  # Hold a list of the new labels\n",
+    "        for ix in indices_to_label:\n",
+    "            \"\"\"\n",
+    "            Here is the tricky part that the manager solves. The indicies are indexed with respect to \n",
+    "            unlabeled data but we want to work with them with respect to the original data. \n",
+    "            The manager makes this almost transparent\n",
+    "            \"\"\"\n",
+    "            '''\n",
+    "            Map the index that is with respect to unlabeled data back to an index with respect to the \n",
+    "            whole dataset\n",
+    "            '''\n",
+    "            original_ix = manager.get_original_index_from_unlabeled_index(ix)\n",
+    "            #print(manager.sources[original_ix]) #Show the original data so we can decide what to label\n",
+    "            # Now we can lookup the label in the original set of labels without any bookkeeping\n",
+    "            y = original_labels_train[original_ix]\n",
+    "            # We create a Label instance, a tuple of index and label\n",
+    "            # The index should be with respect to the unlabeled data, the add_labels function will automatically\n",
+    "            # calculate the offsets\n",
+    "            label = (ix, y)\n",
+    "            # append the labels to a list\n",
+    "            labels.append(label)\n",
+    "        # Insert them all at once.\n",
+    "        manager.add_labels(labels)\n",
+    "        # Note that if you need to add labels with indicies that repsect the original dataset you can do\n",
+    "        # manager.add_labels(labels,offset_to_unlabeled=False)\n",
+    "    # Now teach as usual\n",
+    "    learner.teach(manager.labeled, manager.labels)\n",
+    "    performance_history.append(learner.score(vectors_test, original_labels_test))\n",
+    "# Finnaly make a nice plot\n",
+    "make_pretty_summary_plot(performance_history)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/data_manager_and_text_classification.py b/examples/data_manager_and_text_classification.py
new file mode 100644
index 0000000..bb9b089
--- /dev/null
+++ b/examples/data_manager_and_text_classification.py
@@ -0,0 +1,130 @@
+"""
+This example shows how to use the new data manager class.
+For clarity, all the setup has been moved into functions and
+the core is in the __main__ section which is commented
+
+Also look at prepare_manager to see how a DataManager is instantiated
+
+"""
+
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.ensemble import RandomForestClassifier
+from modAL.datamanager import DataManager
+import numpy as np
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+from sklearn.feature_extraction.text import TfidfVectorizer
+from functools import partial
+
+
+from modAL.models import ActiveLearner
+from modAL.batch import uncertainty_batch_sampling
+
+RANDOM_STATE_SEED = 123
+np.random.seed(RANDOM_STATE_SEED)
+BATCH_SIZE = 5
+N_QUERIES = 50
+
+
+def prepare_data():
+    SKIP_SIZE = 50  # Skip to make the example go fast.
+    docs, original_labels = fetch_20newsgroups(return_X_y=True)
+    docs_train = docs[::SKIP_SIZE]
+    original_labels_train = original_labels[::SKIP_SIZE]
+    docs_test = docs[1::SKIP_SIZE]  # Offset by one means no overlap
+    original_labels_test = original_labels[
+        1::SKIP_SIZE
+    ]  # Offset by one means no overlap
+    return docs_train, original_labels_train, docs_test, original_labels_test
+
+
+def prepare_features(docs_train, docs_test):
+    vectorizer = TfidfVectorizer(
+        stop_words="english", ngram_range=(1, 3), max_df=0.9, max_features=5000
+    )
+
+    vectors_train = vectorizer.fit_transform(docs_train).toarray()
+    vectors_test = vectorizer.transform(docs_test).toarray()
+    return vectors_train, vectors_test
+
+
+def prepare_manager(vectors_train, docs_train):
+    manager = DataManager(vectors_train, sources=docs_train)
+    return manager
+
+
+def prepare_learner():
+
+    estimator = RandomForestClassifier()
+    preset_batch = partial(uncertainty_batch_sampling, n_instances=BATCH_SIZE)
+    learner = ActiveLearner(estimator=estimator, query_strategy=preset_batch)
+    return learner
+
+
+def make_pretty_summary_plot(performance_history):
+    with plt.style.context("seaborn-white"):
+        fig, ax = plt.subplots(figsize=(8.5, 6), dpi=130)
+
+        ax.plot(performance_history)
+        ax.scatter(range(len(performance_history)), performance_history, s=13)
+
+        ax.xaxis.set_major_locator(
+            mpl.ticker.MaxNLocator(nbins=N_QUERIES + 3, integer=True)
+        )
+        ax.xaxis.grid(True)
+
+        ax.yaxis.set_major_locator(mpl.ticker.MaxNLocator(nbins=10))
+        ax.yaxis.set_major_formatter(mpl.ticker.PercentFormatter(xmax=1))
+        ax.set_ylim(bottom=0, top=1)
+        ax.yaxis.grid(True, linestyle="--", alpha=1 / 2)
+
+        ax.set_title("Incremental classification accuracy")
+        ax.set_xlabel("Query iteration")
+        ax.set_ylabel("Classification Accuracy")
+
+        plt.show()
+
+
+if __name__ == "__main__":
+    docs_train, original_labels_train, docs_test, original_labels_test = prepare_data()
+    vectors_train, vectors_test = prepare_features(docs_train, docs_test)
+    manager = prepare_manager(vectors_train, docs_train)
+    learner = prepare_learner()
+    performance_history = []
+    # performance_history.append(learner.score(docs_test, original_labels_test))
+
+    for i in range(N_QUERIES):
+        # Check if there are more examples that are not labeled. If not, break
+        if manager.unlabeld.size == 0:
+            break
+
+        for index in range(1):
+            # query the learner as usual, in this case we are using a batch learning strategy
+            # so indices_to_label is an array
+            indices_to_label, query_instance = learner.query(manager.unlabeld)
+            labels = []  # Hold a list of the new labels
+            for ix in indices_to_label:
+                """
+                Here is the tricky part that the manager solves. The indicies are indexed with respect to unlabeled data
+                but we want to work with them with respect to the original data. The manager makes this almost transparent
+                """
+                # Map the index that is with respect to unlabeled data back to an index with respect to the whole dataset
+                original_ix = manager.get_original_index_from_unlabeled_index(ix)
+                # print(manager.sources[original_ix]) #Show the original data so we can decide what to label
+                # Now we can lookup the label in the original set of labels without any bookkeeping
+                y = original_labels_train[original_ix]
+                # We create a Label instance, a tuple of index and label
+                # The index should be with respect to the unlabeled data, the add_labels function will automatically
+                # calculate the offsets
+                label = (ix, y)
+                # append the labels to a list
+                labels.append(label)
+            # Insert them all at once.
+            manager.add_labels(labels)
+            # Note that if you need to add labels with indicies that repsect the original dataset you can do
+            # manager.add_labels(labels,offset_to_unlabeled=False)
+        # Now teach as usual
+        learner.teach(manager.labeled, manager.labels)
+        performance_history.append(learner.score(vectors_test, original_labels_test))
+    # Finnaly make a nice plot
+    make_pretty_summary_plot(performance_history)
diff --git a/modAL/datamanager.py b/modAL/datamanager.py
new file mode 100644
index 0000000..77b6cb3
--- /dev/null
+++ b/modAL/datamanager.py
@@ -0,0 +1,136 @@
+import typing
+from typing import List, Tuple, Any, Union, Optional,Generic,TypeVar
+import numpy as np
+Label = Tuple[int, np.dtype]
+LabelList = List[Label]
+Sources = List[Any]
+
+
+class DataManager:
+    def __init__(
+        self,
+        features: np.ndarray,
+        labels_dtype: Optional[np.dtype] = None,
+        sources: Optional[Sources] = None,
+    ):
+        """
+
+        When doing active learning we have our Original Data (OD) Labeled Data [LD] and Unlabeled Data [UD]
+        where UD and LD are subsets of OD.
+        The active learner operates on UD and returns indexes relative to it. We want to store those indices with respect
+        to OD, and sometimes see the subset of labels of LD. (The subset of labels of UD is Null)
+
+        That's a fancy way of saying there is a lot book keeping to be done and this class solves that by doing it for you
+
+        The main idea is that we store a mask (labeeld_mask) of indices that have been labeled and then expose UD , LD
+        and the labels by using fancy indexing with that mask. The manager exposes a an add_labels method which lets the
+        user add labels indexed with respect to UD and it will adjust the indices so that they match OD.
+
+        :param features: An array of the features that will be used for AL.
+        :param labels: Any prexesiting labels. Each label is a tuple(idx,label)
+        :param source: A list of the original data
+        """
+        self.features = features
+
+        self._labels =np.empty(shape=self.features.shape[0],dtype=labels_dtype)
+        self.labeled_mask = np.zeros(self.features.shape[0], dtype=bool)
+        self.sources = np.array(sources if sources else [])
+
+    @property
+    def labels(self):
+        '''
+
+        Returns the labels indexed with respect to LD
+
+        '''
+        return self._labels[self.labeled_mask]
+
+    @property
+    def unlabeled_mask(self):
+        '''
+
+        Returns: a mask which is true for all unlabeled points
+
+        '''
+        return np.logical_not(self.labeled_mask)
+
+    def _update_masks(self, labels: Union[LabelList, Label]):
+        for label in labels:
+            self.labeled_mask[label[0]] = True
+
+    def _offset_new_labes(self, labels: LabelList):
+        """
+        This is where the magic happens.
+        We take self.unlabeled_mask.nonzero()[0] which gives us an array of the indices that appear in the unlabeled
+        data. So if the original label was at position 0 we look up the "real index" in the unlabeled_indices array to
+        get it's true index
+        :param labels:
+        :return:
+        """
+        if len(self._labels) == 0:
+            # Nothing to correct in this case
+            return labels
+        correctLabels: LabelList = []
+        unlabeled_indices = self.unlabeled_mask.nonzero()[0]
+
+        for label in labels:
+            newIndex = unlabeled_indices[label[0]]
+            newLabel: Label = (newIndex, label[1])
+            correctLabels.append(newLabel)
+        return correctLabels
+
+    def add_labels(self, labels: LabelList,offset_to_unlabeled=True):
+        if isinstance(labels, tuple):  # if this is a single example
+            labels: LabelList = [labels]
+        elif isinstance(labels, list):
+            pass
+        else:
+            raise Exception(
+                "Malformed input. Please add either a tuple (ix,label) or a list [(ix,label),..]"
+            )
+        if offset_to_unlabeled:
+            labels = self._offset_new_labes(labels)
+        self._update_masks(labels)
+        for label in labels:
+            self._labels[label[0]] = label[1]
+
+    @property
+    def unlabeld(self):
+        """
+
+        :return: Returns UD, all of the unlabeled data points
+        """
+        return self.features[self.unlabeled_mask]
+
+    @property
+    def labeled(self):
+        """
+                :return: Returns LD, all of the labeld data points
+        """
+        return self.features[self.labeled_mask]
+
+    @property
+    def remaining_sources(self):
+        """
+
+        :return: Returns the original data, as opposed to features, with respect to UD
+        """
+        return self.sources[self.unlabeled_mask]
+
+    def get_original_index_from_unlabeled_index(self, ixs:Union[int, List[int]]):
+        '''
+        Utility function that takes as input indices from the unlabeled subset and returns the equivalent indices
+        in the complete array.
+        Useful for testing purposes, where we have the existing labels and want to take them in the order in which
+        the active learner specifes.
+        :param ixs:
+        :return:
+        '''
+        unlabeled_indices = self.unlabeled_mask.nonzero()[0]
+        if isinstance(ixs, np.int64):
+            ixs = [ixs]
+        return list(map(lambda x: unlabeled_indices[x], ixs))
+
+
+
+__all__ = [Label, LabelList, DataManager]
diff --git a/tests/datamanager_tests.py b/tests/datamanager_tests.py
new file mode 100644
index 0000000..57b1e36
--- /dev/null
+++ b/tests/datamanager_tests.py
@@ -0,0 +1,78 @@
+import typing
+import unittest
+import numpy as np
+from modAL.datamanager import DataManager
+
+
+def first_true(ar :np.ndarray):
+    return ar.nonzero()[0][0]
+
+
+class TestAddLabels(unittest.TestCase):
+    def test_test_that_when_the_first_add_is_at_0_it_updates_correctly(self):
+        features = np.array([[x + y for x in range(10)] for y in range(10)])
+        self.assertEqual(features.shape, (10, 10))
+        manager = DataManager(features=features)
+        manager.add_labels([(0, 1)])
+        self.assertEqual(first_true(manager.labeled_mask), 0)
+        # the index of the first unlabeled example is one past the first labeled
+        self.assertEqual(first_true(manager.unlabeled_mask), 1)
+    def test_addto_first_continuously(self):
+        features = np.array([[x+y for x in range(10)] for y in range(10)])
+        self.assertEqual(features.shape,(10,10))
+        manager = DataManager(features=features)
+        manager.add_labels([(0,1)])
+        self.assertEqual(first_true(manager.labeled_mask),0)
+        self.assertEqual(first_true(manager.unlabeled_mask), 1)
+
+        manager.add_labels([(0,1)])
+        self.assertEqual(first_true(manager.labeled_mask),0)
+        self.assertEqual(first_true(manager.unlabeled_mask), 2)
+
+        manager.add_labels([(0, 1)])
+        self.assertEqual(first_true(manager.labeled_mask), 0)
+        self.assertEqual(first_true(manager.unlabeled_mask), 3)
+
+    def test_adding_in_the_middle(self):
+        features = np.array([[x+y for x in range(10)] for y in range(10)])
+        self.assertEqual(features.shape,(10,10))
+        manager = DataManager(features=features)
+        manager.add_labels([(2,1)])
+        self.assertEqual(first_true(manager.labeled_mask),2)
+        self.assertEqual(first_true(manager.unlabeled_mask), 0)
+    def test_adding_two_in_the_middle(self):
+        features = np.array([[x + y for x in range(10)] for y in range(10)])
+        self.assertEqual(features.shape, (10, 10))
+        manager = DataManager(features=features)
+        manager.add_labels([(2, 1)])
+        self.assertEqual(first_true(manager.labeled_mask), 2)
+        self.assertEqual(first_true(manager.unlabeled_mask), 0)
+
+        manager.add_labels([(1, 1)])
+        self.assertEqual(first_true(manager.labeled_mask), 1)
+        # We still didn't label the one at 0
+        self.assertEqual(first_true(manager.unlabeled_mask), 0)
+
+    def test_adding_two_in_the_middle_and_then_at_0(self):
+        features = np.array([[x + y for x in range(10)] for y in range(10)])
+        self.assertEqual(features.shape, (10, 10))
+        manager = DataManager(features=features)
+        manager.add_labels([(2, 1)])
+        self.assertEqual(first_true(manager.labeled_mask), 2)
+        self.assertEqual(first_true(manager.unlabeled_mask), 0)
+
+        manager.add_labels([(1, 1)])
+        self.assertEqual(first_true(manager.labeled_mask), 1)
+        # We still didn't label the one at 0
+        self.assertEqual(first_true(manager.unlabeled_mask), 0)
+
+        manager.add_labels([(0, 1)])
+        self.assertEqual(first_true(manager.labeled_mask), 0)
+        # we labeled 0,1,2 the next one should be 3
+        self.assertEqual(first_true(manager.unlabeled_mask), 3)
+
+
+
+
+if __name__ == '__main__':
+    unittest.main()