Skip to content

Commit

Permalink
Remove support for Elasticsearch v5
Browse files Browse the repository at this point in the history
Closes #799.
  • Loading branch information
dadoonet authored and shahariaazam committed Nov 30, 2019
1 parent c12dddf commit de68a7d
Show file tree
Hide file tree
Showing 36 changed files with 7 additions and 2,329 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ You need to install a version matching your Elasticsearch version:

| Elasticsearch | FS Crawler | Released | Docs |
|--------------------|-------------|----------|------------------------------------------------------------------------------|
| 2.x, 5.x, 6.x, 7.x | 2.7-SNAPSHOT| |[2.7-SNAPSHOT](https://fscrawler.readthedocs.io/en/latest/) |
| 6.x, 7.x | 2.7-SNAPSHOT| |[2.7-SNAPSHOT](https://fscrawler.readthedocs.io/en/latest/) |
| 2.x, 5.x, 6.x | 2.6 |2019-01-09|[2.6](https://fscrawler.readthedocs.io/en/fscrawler-2.6) |
| 2.x, 5.x, 6.x | 2.5 |2018-08-04|[2.5](https://fscrawler.readthedocs.io/en/fscrawler-2.5) |
| 2.x, 5.x, 6.x | **2.4** |2017-08-11|[2.4](https://github.com/dadoonet/fscrawler/blob/fscrawler-2.4/README.md) |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,13 +251,7 @@ public static void main(String[] args) throws Exception {
try {
// Let see if we want to upgrade an existing cluster to latest version
if (commands.upgrade) {
logger.info("Upgrading job [{}]", jobName);
boolean success = fsCrawler.upgrade();
if (success) {
// We can rewrite the fscrawler setting file (we now have a elasticsearch.index_folder property)
logger.info("Updating fscrawler setting file");
fsSettingsFileHandler.write(fsSettings);
}
logger.info("Upgrading job [{}]. No rule implemented. Skipping.", jobName);
} else {
try {
fsCrawler.getEsClient().start();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@

package fr.pilato.elasticsearch.crawler.fs;

import fr.pilato.elasticsearch.crawler.fs.client.ESSearchRequest;
import fr.pilato.elasticsearch.crawler.fs.client.ESSearchResponse;
import fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClient;
import fr.pilato.elasticsearch.crawler.fs.client.ElasticsearchClientUtil;
import fr.pilato.elasticsearch.crawler.fs.framework.FsCrawlerUtil;
Expand Down Expand Up @@ -86,66 +84,6 @@ public ElasticsearchClient getEsClient() {
return esClient;
}

/**
* Upgrade FSCrawler indices
* @return true if done successfully
* @throws Exception In case of error
*/
@SuppressWarnings("deprecation")
public boolean upgrade() throws Exception {
// We need to start a client so we can send requests to elasticsearch
try {
esClient.start();
} catch (Exception t) {
logger.fatal("We can not start Elasticsearch Client. Exiting.", t);
return false;
}

// The upgrade script is for now a bit dumb. It assumes that you had an old version of FSCrawler (< 2.3) and it will
// simply move data from index/folder to index_folder
String index = settings.getElasticsearch().getIndex();

// Check that the old index actually exists
if (esClient.isExistingIndex(index)) {
// We check that the new indices don't exist yet or are empty
String indexFolder = settings.getElasticsearch().getIndexFolder();
boolean indexExists = esClient.isExistingIndex(indexFolder);
long numberOfDocs = 0;
if (indexExists) {
ESSearchResponse responseFolder = esClient.search(new ESSearchRequest().withIndex(indexFolder));
numberOfDocs = responseFolder.getTotalHits();
}
if (numberOfDocs > 0) {
logger.warn("[{}] already exists and is not empty. No upgrade needed.", indexFolder);
} else {
logger.debug("[{}] can be upgraded.", index);

// Create the new indices with the right mappings (well, we don't read existing user configuration)
if (!indexExists) {
esClient.createIndices();
logger.info("[{}] has been created.", indexFolder);
}

// Run reindex task for folders
logger.info("Starting reindex folders...");
int folders = esClient.reindex(index, INDEX_TYPE_FOLDER, indexFolder);
logger.info("Done reindexing [{}] folders...", folders);

// Run delete by query task for folders
logger.info("Starting removing folders from [{}]...", index);
esClient.deleteByQuery(index, INDEX_TYPE_FOLDER);
logger.info("Done removing folders from [{}]", index);

logger.info("You can now upgrade your elasticsearch cluster to >=6.0.0!");
return true;
}
} else {
logger.info("[{}] does not exist. No upgrade needed.", index);
}

return false;
}

public void start() throws Exception {
logger.info("Starting FS crawler");
if (loop < 0) {
Expand Down
48 changes: 0 additions & 48 deletions distribution/es5/pom.xml

This file was deleted.

This file was deleted.

1 change: 0 additions & 1 deletion distribution/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
<modules>
<module>es7</module>
<module>es6</module>
<module>es5</module>
</modules>

<dependencies>
Expand Down
12 changes: 0 additions & 12 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,10 @@ def read_version(full_version=True):
# The full version, including alpha/beta/rc tags.
release = read_version()

downloadUrlV5 = "https://repo1.maven.org/maven2/fr/pilato/elasticsearch/crawler/fscrawler-es5/%s/fscrawler-es5-%s.zip" % (version, version)
downloadUrlV6 = "https://repo1.maven.org/maven2/fr/pilato/elasticsearch/crawler/fscrawler-es6/%s/fscrawler-es6-%s.zip" % (version, version)
downloadUrlV7 = "https://repo1.maven.org/maven2/fr/pilato/elasticsearch/crawler/fscrawler-es7/%s/fscrawler-es7-%s.zip" % (version, version)

if release.endswith('-SNAPSHOT'):
downloadUrlV5 = "https://oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es5/%s/" % release
downloadUrlV6 = "https://oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es6/%s/" % release
downloadUrlV7 = "https://oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es7/%s/" % release

Expand Down Expand Up @@ -231,50 +229,40 @@ def read_version(full_version=True):
.. |ES| replace:: Elasticsearch
.. |Tika_format| replace:: Tika
.. |Tika_version| replace:: Tika {fmt_tika_version}
.. |ESHL_version5| replace:: Elasticsearch Rest Client {fmt_es_version5}
.. |ESHL_version6| replace:: Elasticsearch Rest Client {fmt_es_version6}
.. |ESHL_version7| replace:: Elasticsearch Rest Client {fmt_es_version7}
.. |Levigo_version| replace:: levigo-jbig2-imageio:{fmt_levigo_version}
.. |Tiff_version| replace:: jai-imageio-core:{fmt_tiff_version}
.. |JPEG2000_version| replace:: jai-imageio-jpeg2000:{fmt_jpeg_version}
.. |Download_URL_V5| replace:: fscrawler-es5-{fmt_release}
.. |Download_URL_V6| replace:: fscrawler-es6-{fmt_release}
.. |Download_URL_V7| replace:: fscrawler-es7-{fmt_release}
.. |Maven_Central_V5| replace:: fscrawler-es5-*
.. |Maven_Central_V6| replace:: fscrawler-es6-*
.. |Maven_Central_V7| replace:: fscrawler-es7-*
.. |Sonatype_V5| replace:: fscrawler-es5-*
.. |Sonatype_V6| replace:: fscrawler-es6-*
.. |Sonatype_V7| replace:: fscrawler-es7-*
.. _Tika: http://tika.apache.org/{fmt_tika_version}/
.. _ES: https://www.elastic.co/products/elasticsearch
.. _Tika_format: http://tika.apache.org/{fmt_tika_version}/formats.html#Supported_Document_Formats
.. _Tika_version: http://tika.apache.org/{fmt_tika_version}/
.. _ESHL_version5: https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current/index.html
.. _ESHL_version6: https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current/index.html
.. _ESHL_version7: https://www.elastic.co/guide/en/elasticsearch/client/java-rest/current/index.html
.. _Levigo_version: http://repo1.maven.org/maven2/com/levigo/jbig2/levigo-jbig2-imageio/{fmt_levigo_version}/
.. _Tiff_version: http://repo1.maven.org/maven2/com/github/jai-imageio/jai-imageio-core/{fmt_tiff_version}/
.. _JPEG2000_version: http://repo1.maven.org/maven2/com/github/jai-imageio/jai-imageio-jpeg2000/{fmt_jpeg_version}/
.. _Download_URL_V5: {fmt_downloadUrl_V5}
.. _Download_URL_V6: {fmt_downloadUrl_V6}
.. _Download_URL_V7: {fmt_downloadUrl_V7}
.. _Maven_Central_V5: https://repo1.maven.org/maven2/fr/pilato/elasticsearch/crawler/fscrawler-es5/
.. _Maven_Central_V6: https://repo1.maven.org/maven2/fr/pilato/elasticsearch/crawler/fscrawler-es6/
.. _Maven_Central_V7: https://repo1.maven.org/maven2/fr/pilato/elasticsearch/crawler/fscrawler-es7/
.. _Sonatype_V5: https://oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es5/
.. _Sonatype_V6: https://oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es6/
.. _Sonatype_V7: https://oss.sonatype.org/content/repositories/snapshots/fr/pilato/elasticsearch/crawler/fscrawler-es7/
""".format(
fmt_tika_version=config.get('3rdParty', 'TikaVersion'),
fmt_es_version5=config.get('3rdParty', 'ElasticsearchVersion5'),
fmt_es_version6=config.get('3rdParty', 'ElasticsearchVersion6'),
fmt_es_version7=config.get('3rdParty', 'ElasticsearchVersion7'),
fmt_levigo_version=config.get('3rdParty', 'LevigoVersion'),
fmt_tiff_version=config.get('3rdParty', 'TiffVersion'),
fmt_jpeg_version=config.get('3rdParty', 'JpegVersion'),
fmt_downloadUrl_V5=downloadUrlV5,
fmt_downloadUrl_V6=downloadUrlV6,
fmt_downloadUrl_V7=downloadUrlV7,
fmt_release=release
Expand Down
4 changes: 1 addition & 3 deletions docs/source/dev/build.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ But you need first to specify the Maven profile to use and rebuild the project.

* ``es-7x`` for Elasticsearch 7.x
* ``es-6x`` for Elasticsearch 6.x
* ``es-5x`` for Elasticsearch 5.x


Run tests with an external cluster
Expand All @@ -62,9 +61,8 @@ To run the test suite against an elasticsearch instance running locally, just ru

.. tip::

If you want to run against a version 5 or 6, run::
If you want to run against a version 6, run::

mvn verify -pl fr.pilato.elasticsearch.crawler:fscrawler-it-v5 -Dtests.cluster.url=http://localhost:9200
mvn verify -pl fr.pilato.elasticsearch.crawler:fscrawler-it-v6 -Dtests.cluster.url=http://localhost:9200

.. hint::
Expand Down
1 change: 0 additions & 1 deletion docs/source/fscrawler.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ Version=2.7-SNAPSHOT

[3rdParty]
TikaVersion=1.22
ElasticsearchVersion5=5.6.15
ElasticsearchVersion6=6.8.5
ElasticsearchVersion7=7.4.2
LevigoVersion=2.0
Expand Down
1 change: 0 additions & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ This crawler helps to index binary documents such as PDF, Open Office, MS Office

* |ESHL_version7|_ for Elasticsearch V7.
* |ESHL_version6|_ for Elasticsearch V6.
* |ESHL_version5|_ for Elasticsearch V5.

.. toctree::
:caption: Installation Guide
Expand Down
6 changes: 1 addition & 5 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ Download FSCrawler

* |Download_URL_V7|_ for Elasticsearch V7.
* |Download_URL_V6|_ for Elasticsearch V6.
* |Download_URL_V5|_ for Elasticsearch V5.

The filename ends with ``.zip``.

Expand All @@ -19,7 +18,6 @@ Download FSCrawler

* |Maven_Central_V7|_ for Elasticsearch V7.
* |Maven_Central_V6|_ for Elasticsearch V6.
* |Maven_Central_V5|_ for Elasticsearch V5.

.. ifconfig:: release == version

Expand All @@ -28,7 +26,6 @@ Download FSCrawler

* |Download_URL_V7|_ for Elasticsearch V7.
* |Download_URL_V6|_ for Elasticsearch V6.
* |Download_URL_V5|_ for Elasticsearch V5.

.. tip::

Expand All @@ -37,13 +34,11 @@ Download FSCrawler

* |Maven_Central_V7|_ for Elasticsearch V7.
* |Maven_Central_V6|_ for Elasticsearch V6.
* |Maven_Central_V5|_ for Elasticsearch V5.

You can also download a **SNAPSHOT** version from Sonatype:

* |Sonatype_V7|_ for Elasticsearch V7.
* |Sonatype_V6|_ for Elasticsearch V6.
* |Sonatype_V5|_ for Elasticsearch V5.

The distribution contains:

Expand Down Expand Up @@ -344,4 +339,5 @@ and replace all elasticsearch/lucene jars to the 6.6 version.
- FSCrawler does not follow symbolic links anymore. You need to set explicitly ``fs.follow_symlink``
to ``true`` if you wish revert to the previous behavior.
- The mapping for elasticsearch 6.x can not contain anymore the type name.
- We removed the Elasticsearch V5 compatibility as it's not maintained anymore by elastic.

1 change: 0 additions & 1 deletion docs/src/main/resources/fscrawler.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ Version=${project.version}

[3rdParty]
TikaVersion=${tika.version}
ElasticsearchVersion5=${elasticsearch5.version}
ElasticsearchVersion6=${elasticsearch6.version}
ElasticsearchVersion7=${elasticsearch7.version}
LevigoVersion=${levigo.version}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,24 +90,6 @@ public interface ElasticsearchClient extends Closeable {
*/
void waitForHealthyIndex(String index) throws IOException;

/**
* Reindex data from one index/type to another index
* @param sourceIndex source index name
* @param sourceType source type name
* @param targetIndex target index name
* @return The number of documents that have been reindexed
* @throws IOException In case of error
*/
int reindex(String sourceIndex, String sourceType, String targetIndex) throws IOException;

/**
* Fully removes a type from an index (removes data)
* @param index index name
* @param type type
* @throws IOException In case of error
*/
void deleteByQuery(String index, String type) throws IOException;

// Utility methods

boolean isIngestSupported();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,6 @@ public void waitForHealthyIndex(String index) {
// Testing purpose only
}

@Override
public int reindex(String sourceIndex, String sourceType, String targetIndex) {
return 0;
}

@Override
public void deleteByQuery(String index, String type) {
// Testing purpose only
}

@Override
public boolean isIngestSupported() {
return false;
Expand Down
Loading

0 comments on commit de68a7d

Please sign in to comment.