diff --git a/README.md b/README.md
index 59a1093d8..56c5a79f7 100644
--- a/README.md
+++ b/README.md
@@ -3,21 +3,23 @@
[![Documentation Status](https://readthedocs.org/projects/capreolus/badge/?version=latest)](https://capreolus.readthedocs.io/?badge=latest)
[![PyPI version fury.io](https://badge.fury.io/py/capreolus.svg)](https://pypi.python.org/pypi/capreolus/)
[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/ambv/black)
-[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/capreolus-ir/capreolus.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/capreolus-ir/capreolus/context:python)
# Capreolus
[![Capreolus](https://people.mpi-inf.mpg.de/~ayates/capreolus/capreolus-100px.png)](https://capreolus.ai)
Capreolus is a toolkit for conducting end-to-end ad hoc retrieval experiments. Capreolus provides fine control over the entire experimental pipeline through the use of interchangeable and configurable modules.
+[Get started with a Notebook](https://colab.research.google.com/drive/161FnmLt3PgIXG-Z5eNg45z2iSZucVAnr?usp=sharing) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/161FnmLt3PgIXG-Z5eNg45z2iSZucVAnr?usp=sharing)
+
[Read the documentation for a detailed overview.](http://capreolus.ai/)
## Quick Start
-1. Prerequisites: Python 3.6+ and Java 11
+1. Prerequisites: Python 3.7+ and Java 11. See the [installation instructions](https://capreolus.ai/en/latest/installation.html)
2. Install the pip package: `pip install capreolus`
-3. Train a model: `capreolus rerank.traineval with reranker.name=KNRM reranker.trainer.niters=2`
-4. If the `train` command completed successfully, you've trained your first Capreolus reranker on robust04! This command created several outputs, such as run files, a loss plot, and a ranking metric plot on the dev set queries. To learn about these files, [read about running experiments with Capreolus](http://capreolus.ai/en/latest/cli.html).
-5. To learn about different configuration options, try: `capreolus rerank.print_config with reranker.name=KNRM`
-5. To learn about different modules you can use, such as `reranker.name=DRMM`, try: `capreolus modules`
+3. Train a model: `capreolus rerank.traineval with benchmark.name=nf reranker.name=KNRM reranker.trainer.niters=2`
+4. If the `train` command completed successfully, you've trained your first Capreolus reranker on [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/)! This command created several outputs, such as model checkpoints and TREC-format run files. To learn about these files, [read about running experiments with Capreolus](http://capreolus.ai/en/latest/cli.html).
+5. To learn about different configuration options, try: `capreolus rerank.print_config with benchmark.name=nf reranker.name=KNRM`
+6. To learn about different modules you can use, such as `reranker.name=DRMM`, try: `capreolus modules`
+7. Learn about [running experiments via the Python API](https://capreolus.ai/en/latest/quick.html)
## Environment Variables
Capreolus uses environment variables to indicate where outputs should be stored and where document inputs can be found. Consult the table below to determine which variables should be set. Set them either on the fly before running Capreolus (`export CAPREOLUS_RESULTS=...`) or by editing your shell's initialization files (e.g., `~/.bashrc` or `~/.zshrc`).
diff --git a/capreolus/__init__.py b/capreolus/__init__.py
index c0873d86b..27e5909ab 100644
--- a/capreolus/__init__.py
+++ b/capreolus/__init__.py
@@ -5,7 +5,7 @@
from profane import ConfigOption, Dependency, ModuleBase, constants, config_list_to_dict, module_registry
-__version__ = "0.2.4.1"
+__version__ = "0.2.5"
### set constants used by capreolus and profane ###
diff --git a/capreolus/extractor/embedtext.py b/capreolus/extractor/embedtext.py
index 3f74fcc57..d9d5d5a8f 100644
--- a/capreolus/extractor/embedtext.py
+++ b/capreolus/extractor/embedtext.py
@@ -24,10 +24,10 @@ class EmbedText(Extractor):
Dependency(key="tokenizer", module="tokenizer", name="anserini"),
]
config_spec = [
- ConfigOption("embeddings", "glove6b"),
+ ConfigOption("embeddings", "glove6b", "embeddings to use: fasttext, glove6b, glove6b.50d, or w2vnews"),
ConfigOption("calcidf", True),
- ConfigOption("maxqlen", 4),
- ConfigOption("maxdoclen", 800),
+ ConfigOption("maxqlen", 4, "maximum query length (shorter will be truncated)"),
+ ConfigOption("maxdoclen", 800, "maximum doc length (shorter will be truncated)"),
]
pad_tok = ""
diff --git a/capreolus/extractor/slowembedtext.py b/capreolus/extractor/slowembedtext.py
index 99be0821e..a8f13a049 100644
--- a/capreolus/extractor/slowembedtext.py
+++ b/capreolus/extractor/slowembedtext.py
@@ -26,11 +26,11 @@ class SlowEmbedText(Extractor):
Dependency(key="tokenizer", module="tokenizer", name="anserini"),
]
config_spec = [
- ConfigOption("embeddings", "glove6b"),
- ConfigOption("zerounk", False),
+ ConfigOption("embeddings", "glove6b", "embeddings to use: fasttext, glove6b, glove6b.50d, or w2vnews"),
+ ConfigOption("zerounk", False, "use all zeros for unknown terms (True) or generate a random embedding (False)"),
ConfigOption("calcidf", True),
- ConfigOption("maxqlen", 4),
- ConfigOption("maxdoclen", 800),
+ ConfigOption("maxqlen", 4, "maximum query length (shorter will be truncated)"),
+ ConfigOption("maxdoclen", 800, "maximum doc length (shorter will be truncated)"),
ConfigOption("usecache", False),
]
diff --git a/capreolus/reranker/parade.py b/capreolus/reranker/parade.py
index b37db7670..781510bc9 100644
--- a/capreolus/reranker/parade.py
+++ b/capreolus/reranker/parade.py
@@ -116,7 +116,7 @@ class TFParade(Reranker):
ConfigOption(
"pretrained", "bert-base-uncased", "Pretrained model: bert-base-uncased, bert-base-msmarco, or electra-base-msmarco"
),
- ConfigOption("aggregation", "maxp"),
+ ConfigOption("aggregation", "transformer"),
]
def build_model(self):
diff --git a/docs/index.rst b/docs/index.rst
index 200e9883d..b7978ee24 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -2,6 +2,8 @@ Capreolus
=========================================
Capreolus is a toolkit for constructing flexible *ad hoc retrieval pipelines*. Capreolus pipelines can be run via a Python or command line interface.
+Want to jump in? `Get started with a Notebook. `_ |Colab Badge|
+
Capreolus is organized around the idea of interchangeable and configurable *modules*, such as a neural ``Reranker`` or a first stage ``Searcher``. Researchers can implement new module classes, such as a new neural ``Reranker``, to experiment with a new module while controlling for all other variables in the pipeline (e.g., the first stage ranking method and its parameters, folds used for cross-validation, tokenization and embeddings if applicable used with the reranker, neural training options like the number of iterations, batch size, and loss function, etc).
Since Capreolus v0.2, *pipelines* are instances of the ``Task`` module and can be combined like any other module.
@@ -27,3 +29,11 @@ Looking for the code? `Find Capreolus on GitHub. `_
+
+.. |Colab Badge| image:: https://colab.research.google.com/assets/colab-badge.svg
+ :alt: Open in Colab
+ :scale: 100%
+ :target: https://colab.research.google.com/drive/161FnmLt3PgIXG-Z5eNg45z2iSZucVAnr?usp=sharing
+
+
+
diff --git a/docs/quick.md b/docs/quick.md
index 7b17d0a2f..6c5c7d672 100644
--- a/docs/quick.md
+++ b/docs/quick.md
@@ -1,5 +1,8 @@
# Getting Started
+Want to jump in? [Get started with a Notebook.](https://colab.research.google.com/drive/161FnmLt3PgIXG-Z5eNg45z2iSZucVAnr?usp=sharing) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/161FnmLt3PgIXG-Z5eNg45z2iSZucVAnr?usp=sharing)
+
+## Prerequisites
- Requirements: Python 3.7+, a Python environment you can install packages in (e.g., a [Conda environment](https://gist.github.com/andrewyates/970c570411c4a36785f6c0e9362eb1eb)), and Java 11. See the [detailed installation instructions](installation) for help with these.
- Install: `pip install capreolus`
diff --git a/docs/reproduction/PARADE.md b/docs/reproduction/PARADE.md
index 034904e2d..b4a10974e 100644
--- a/docs/reproduction/PARADE.md
+++ b/docs/reproduction/PARADE.md
@@ -16,15 +16,25 @@ This section contains instructions for installing Capreolus. **Do not** install
3. Briefly read about [configuring Capreolus](https://capreolus.ai/en/latest/installation.html#configuring-capreolus). The main thing to note is that results will be stored in `~/.capreolus` by default.
## Running PARADE (reduced memory usage)
-This section describes how to run PARADE on a GPU with 16GB RAM. This is substantially less than used in the [paper](https://arxiv.org/abs/2008.09093), so we'll reduce the batch size and the size of each passage to make the data to fit.
+This section describes how to run PARADE on a GPU with 16GB RAM. This is substantially less than used in the [paper](https://arxiv.org/abs/2008.09093), so we'll train on a single fold and change many hyperparameters to make this run smoothly. However, this won't reach the same effectiveness as the full PARADE model (see instructions below).
1. Make sure you have an available GPU and are in the top-level `capreolus` directory.
2. Train and evaluate PARADE on a single fold: `python -m capreolus.run rerank.traineval with file=docs/reproduction/config_parade_small.txt fold=s1`
3. This command takes about 3.5 hours on a Titan Xp GPU. Once it finishes, metrics on the dev and test sets are shown:
-> 2020-09-01 15:45:10,053 - INFO - capreolus.task.rerank.evaluate - rerank: fold=s1 dev metrics: P_1=0.750 P_10=0.500 P_20=0.443 P_5=0.554 judged_10=0.992 judged_20=0.989 judged_200=0.947 map=0.267 ndcg_cut_10=0.533 ndcg_cut_20=0.513 ndcg_cut_5=0.562 recall_100=0.453 recall_1000=0.453 recip_rank=0.817
+> 2020-10-20 12:39:37,265 - INFO - capreolus.task.rerank.evaluate - rerank: fold=s1 dev metrics: P_1=0.688 P_10=0.529 P_20=0.428 P_5=0.596 judged_10=0.998 judged_20=0.995 judged_200=0.947 map=0.271 ndcg_cut_10=0.545 ndcg_cut_20=0.504 ndcg_cut_5=0.577 recall_100=0.453 recall_1000=0.453 recip_rank=0.787
-> 2020-09-01 15:45:10,095 - INFO - capreolus.task.rerank.evaluate - rerank: fold=s1 test metrics: P_1=0.596 P_10=0.487 P_20=0.419 P_5=0.549 judged_10=0.989 judged_20=0.985 judged_200=0.931 map=0.285 ndcg_cut_10=0.491 ndcg_cut_20=0.486 ndcg_cut_5=0.518 recall_100=0.490 recall_1000=0.490 recip_rank=0.727
+> 2020-10-20 12:39:37,343 - INFO - capreolus.task.rerank.evaluate - rerank: fold=s1 test metrics: P_1=0.532 P_10=0.472 P_20=0.418 P_5=0.528 judged_10=0.989 judged_20=0.989 judged_200=0.931 map=0.285 ndcg_cut_10=0.470 ndcg_cut_20=0.471 ndcg_cut_5=0.485 recall_100=0.490 recall_1000=0.490 recip_rank=0.672
4. Compare your *fold=s1* results to those shown here. Do they match? If so, we can move on to reproducing the full PARADE model.
## Running PARADE (full model with normal memory usage)
-TODO. This requires a 48GB GPU, a TPU, or porting PARADE to Pytorch so we can iterate over passages rather than loading all of them in memory at once (see issue #86). The corresponding config is in `docs/reproduction/config_parade.txt`.
+This requires a 48GB GPU, a TPU, or porting PARADE to Pytorch so we can iterate over passages rather than loading all of them in memory at once (see issue #86). It has been tested on NVIDIA Quadro RTX 8000s and Google Cloud TPUs.
+
+1. Make sure you have an available GPU and are in the top-level `capreolus` directory.
+2. Train and evaluate PARADE on each of the five robust04 folds (splits *s1-s5*):
+`python -m capreolus.run rerank.traineval with file=docs/reproduction/config_parade.txt fold=s1`
+`python -m capreolus.run rerank.traineval with file=docs/reproduction/config_parade.txt fold=s2`
+`python -m capreolus.run rerank.traineval with file=docs/reproduction/config_parade.txt fold=s3`
+`python -m capreolus.run rerank.traineval with file=docs/reproduction/config_parade.txt fold=s4`
+`python -m capreolus.run rerank.traineval with file=docs/reproduction/config_parade.txt fold=s5`
+3. Each command will take a long time; approximately 36 hours on a Quadro 8000 (much faster on TPU). As above, per-fold metrics are displayed after each fold completes.
+4. When the final fold completes, cross-validated metrics are also displayed.
diff --git a/docs/reproduction/config_parade.txt b/docs/reproduction/config_parade.txt
index c2200f8ee..4a65027c4 100644
--- a/docs/reproduction/config_parade.txt
+++ b/docs/reproduction/config_parade.txt
@@ -6,6 +6,7 @@ rank.searcher.name=bm25staticrob04yang19
reranker.name=parade
reranker.aggregation=transformer
+reranker.pretrained=electra-base-msmarco
reranker.extractor.usecache=True
reranker.extractor.maxseqlen=256
diff --git a/docs/reproduction/config_parade_small.txt b/docs/reproduction/config_parade_small.txt
index 4d40d5803..47bd4d56e 100644
--- a/docs/reproduction/config_parade_small.txt
+++ b/docs/reproduction/config_parade_small.txt
@@ -6,15 +6,16 @@ rank.searcher.name=bm25staticrob04yang19
reranker.name=parade
reranker.aggregation=transformer
+reranker.pretrained=electra-base-msmarco
reranker.extractor.usecache=True
-reranker.extractor.maxseqlen=150
+reranker.extractor.maxseqlen=125
reranker.extractor.numpassages=12
-reranker.extractor.passagelen=125
-reranker.extractor.stride=25
+reranker.extractor.passagelen=100
+reranker.extractor.stride=75
reranker.extractor.prob=0.1
-reranker.trainer.niters=36
+reranker.trainer.niters=4
reranker.trainer.itersize=256
reranker.trainer.validatefreq=2
reranker.trainer.batch=2