diff --git a/MANIFEST.in b/MANIFEST.in index fa0df662c..e96c698e9 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,12 +1,16 @@ -include capreolus/utils/cache.capnp -include capreolus/collection/dummy.yaml -include capreolus/collection/dummy_folds.json -include capreolus/collection/qrels.dummy.txt -include capreolus/collection/topics.dummy.txt -include capreolus/collection/dummy/data/dummy_trec_doc -include capreolus/collection/qrels.robust2004.txt -include capreolus/collection/rob04_cedr_folds.json -include capreolus/collection/rob04_yang19_folds.json -include capreolus/collection/rob04_yang19_rm3.run -include capreolus/collection/robust04.yaml -include capreolus/collection/topics.robust04.301-450.601-700.txt +include capreolus/data/antique.json +include capreolus/data/dummy/data/dummy_trec_doc +include capreolus/data/dummy_folds.json +include capreolus/data/dummy.yaml +include capreolus/data/msmarcopassage.folds.json +include capreolus/data/qrels.antique.txt +include capreolus/data/qrels.dummy.txt +include capreolus/data/qrels.msmarcopassage.txt +include capreolus/data/qrels.robust2004.txt +include capreolus/data/rob04_cedr_folds.json +include capreolus/data/rob04_yang19_folds.json +include capreolus/data/rob04_yang19_rm3.run +include capreolus/data/robust04.yaml +include capreolus/data/topics.antique.txt +include capreolus/data/topics.dummy.txt +include capreolus/data/topics.robust04.301-450.601-700.txt diff --git a/capreolus/collection/__init__.py b/capreolus/collection/__init__.py index 50224a73d..ae4f7d416 100644 --- a/capreolus/collection/__init__.py +++ b/capreolus/collection/__init__.py @@ -195,7 +195,7 @@ class ANTIQUE(Collection): generator_type = "DefaultLuceneDocumentGenerator" def download_if_missing(self): - url = "https://ciir.cs.umass.edu/downloads/Antique/antique-collection.txt" + url = "http://ciir.cs.umass.edu/downloads/Antique/antique-collection.txt" cachedir = self.get_cache_path() document_dir = os.path.join(cachedir, "documents") coll_filename = os.path.join(document_dir, "antique-collection.txt") @@ -232,7 +232,10 @@ def _convert_to_trec(self, inp_path, outp_path): def _validate_document_path(self, path): """ Checks that the sha256sum is correct """ - return hash_file(path) == "409e0960f918970977ceab9e5b1d372f45395af25d53b95644bdc9ccbbf973da" + return ( + hash_file(os.path.join(path, "antique-collection.txt")) + == "409e0960f918970977ceab9e5b1d372f45395af25d53b95644bdc9ccbbf973da" + ) @Collection.register diff --git a/requirements.txt b/requirements.txt index 6b03337b4..ac1ffe0d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,6 +27,7 @@ fasteners==0.15 pytest-mock==1.10.4 mock pyserini==0.9.3.0 +numpy scipy sphinx sphinxcontrib-apidoc diff --git a/setup.py b/setup.py index 1e88f0a8b..4cb656a67 100644 --- a/setup.py +++ b/setup.py @@ -61,6 +61,7 @@ def run(self): "pytest-mock==1.10.4", "mock", "pyserini==0.9.3.0", + "numpy", "scipy", "keras", "google-api-python-client",