Skip to content

Commit

Permalink
Merge pull request #236 from jorgelbg/NUTCH-2399
Browse files Browse the repository at this point in the history
NUTCH-2399 Add support for multivalue fields on indexer-elastic
  • Loading branch information
jorgelbg authored Dec 6, 2017
2 parents 708cc56 + 106a215 commit f483e52
Showing 1 changed file with 9 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.IOException;
import java.net.InetAddress;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

Expand All @@ -32,6 +33,7 @@
import org.apache.hadoop.mapred.JobConf;
import org.apache.nutch.indexer.IndexWriter;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.indexer.NutchField;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BackoffPolicy;
Expand Down Expand Up @@ -174,9 +176,13 @@ public void write(NutchDocument doc) throws IOException {

// Add each field of this doc to the index source
Map<String, Object> source = new HashMap<String, Object>();
for (String fieldName : doc.getFieldNames()) {
if (doc.getFieldValue(fieldName) != null) {
source.put(fieldName, doc.getFieldValue(fieldName));
for (final Map.Entry<String, NutchField> e : doc) {
final List<Object> values = e.getValue().getValues();

if (values.size() > 1) {
source.put(e.getKey(), values);
} else {
source.put(e.getKey(), values.get(0));
}
}

Expand Down

0 comments on commit f483e52

Please sign in to comment.