");
+ }
+
+ @Override
+ public String toString() {
+ return "TokenBody{" +
+ "termBuffer='" + termBuffer + '\'' +
+ ", startOffset=" + startOffset +
+ ", endOffset=" + endOffset +
+ ", position=" + position +
+ ", startPosition=" + startPosition +
+ ", endPosition=" + endPosition +
+ ", type='" + type + '\'' +
+ ", child=" + child +
+ '}';
+ }
+}
diff --git a/core/src/main/java/org/wltea/analyzer/fcp/tokenattributes/PositionLengthAttribute.java b/core/src/main/java/org/wltea/analyzer/fcp/tokenattributes/PositionLengthAttribute.java
new file mode 100644
index 0000000..2e0f632
--- /dev/null
+++ b/core/src/main/java/org/wltea/analyzer/fcp/tokenattributes/PositionLengthAttribute.java
@@ -0,0 +1,34 @@
+package org.wltea.analyzer.fcp.tokenattributes;
+
+import org.apache.lucene.util.Attribute;
+
+/** Determines how many positions this
+ * token spans. Very few analyzer components actually
+ * produce this attribute, and indexing ignores it, but
+ * it's useful to express the graph structure naturally
+ * produced by decompounding, word splitting/joining,
+ * synonym filtering, etc.
+ *
+ * NOTE: this is optional, and most analyzers
+ * don't change the default value (1). */
+
+@Deprecated
+public interface PositionLengthAttribute extends Attribute {
+ /**
+ * Set the position length of this Token.
+ *
+ * The default value is one.
+ * @param positionLength how many positions this token
+ * spans.
+ * @throws IllegalArgumentException if positionLength
+ * is zero or negative.
+ * @see #getPositionLength()
+ */
+ public void setPositionLength(int positionLength);
+
+ /** Returns the position length of this Token.
+ * @see #setPositionLength
+ */
+ public int getPositionLength();
+}
+
diff --git a/core/src/main/java/org/wltea/analyzer/fcp/tokenattributes/PositionLengthAttributeImpl.java b/core/src/main/java/org/wltea/analyzer/fcp/tokenattributes/PositionLengthAttributeImpl.java
new file mode 100644
index 0000000..c4d5dff
--- /dev/null
+++ b/core/src/main/java/org/wltea/analyzer/fcp/tokenattributes/PositionLengthAttributeImpl.java
@@ -0,0 +1,63 @@
+package org.wltea.analyzer.fcp.tokenattributes;
+
+
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeReflector;
+
+/** Default implementation of {@link PositionLengthAttribute}. */
+@Deprecated
+public class PositionLengthAttributeImpl extends AttributeImpl implements PositionLengthAttribute, Cloneable {
+ private int positionLength = 1;
+
+ /** Initializes this attribute with position length of 1. */
+ public PositionLengthAttributeImpl() {}
+
+ @Override
+ public void setPositionLength(int positionLength) {
+ if (positionLength < 1) {
+ throw new IllegalArgumentException("Position length must be 1 or greater; got " + positionLength);
+ }
+ this.positionLength = positionLength;
+ }
+
+ @Override
+ public int getPositionLength() {
+ return positionLength;
+ }
+
+ @Override
+ public void clear() {
+ this.positionLength = 1;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == this) {
+ return true;
+ }
+
+ if (other instanceof PositionLengthAttributeImpl) {
+ PositionLengthAttributeImpl _other = (PositionLengthAttributeImpl) other;
+ return positionLength == _other.positionLength;
+ }
+
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return positionLength;
+ }
+
+ @Override
+ public void copyTo(AttributeImpl target) {
+ PositionLengthAttribute t = (PositionLengthAttribute) target;
+ t.setPositionLength(positionLength);
+ }
+
+ @Override
+ public void reflectWith(AttributeReflector reflector) {
+ reflector.reflect(PositionLengthAttribute.class, "positionLength", positionLength);
+ }
+}
+
diff --git a/core/src/main/java/org/wltea/analyzer/fcp/util/CharacterUtil.java b/core/src/main/java/org/wltea/analyzer/fcp/util/CharacterUtil.java
new file mode 100644
index 0000000..0f61896
--- /dev/null
+++ b/core/src/main/java/org/wltea/analyzer/fcp/util/CharacterUtil.java
@@ -0,0 +1,124 @@
+/**
+ * IK 中文分词 版本 5.0
+ * IK Analyzer release 5.0
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 版权声明 2012,乌龙茶工作室
+ * provided by Linliangyi and copyright 2012 by Oolong studio
+ *
+ * 字符集识别工具类
+ */
+package org.wltea.analyzer.fcp.util;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 字符集识别工具类
+ */
+public class CharacterUtil {
+
+ public static final String CHAR_USELESS = "";
+
+ public static final String CHAR_ENGLISH = "";
+
+ public static final String CHAR_NUMBER = "";
+
+ public static final String CHAR_NUMBER_DOT = "";
+
+ public static final String ALPHANUM = "";
+
+ public static final String CHAR_CHINESE = "";
+
+ public static final String COMBINE_WORD = "";
+
+ public static final String CHAR_MAPPING = "";
+
+ public static final String CHAR_BLANK = "";
+
+ // pinyin
+ public static final String CHAR_PINYIN = "";
+ // pinyin 前缀
+ public static final String CHAR_PINYIN_PRE = "";
+
+ private static Map order;
+ static {
+ // value 越小,排序越靠前,用于区分在同一个 position 上的不同 type 之间的排序
+ order = new HashMap<>();
+ order.put(CHAR_CHINESE, 0);
+ order.put(CHAR_PINYIN_PRE, 5);
+ order.put(CHAR_PINYIN, 10);
+
+ order.put(CHAR_USELESS, 0);
+ order.put(CHAR_MAPPING, 10);
+ }
+
+ public static int getOrderByType(String type) {
+ return order.getOrDefault(type, 0);
+ }
+
+
+
+ /**
+ * 识别字符类型
+ * @param input
+ * @return int CharacterUtil定义的字符类型常量
+ */
+ public static String identifyCharType(int input){
+
+ if (input >= '0' && input <= '9') {
+ return CHAR_NUMBER;
+ } else if ((input >= 'a' && input <= 'z')
+ || (input >= 'A' && input <= 'Z')) {
+ return CHAR_ENGLISH;
+ } else {
+ Character.UnicodeBlock ub = Character.UnicodeBlock.of(input);
+
+ if(ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
+ || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
+ || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A){
+ //目前已知的中文字符UTF-8集合
+ return CHAR_CHINESE;
+
+ }
+ }
+ //其他的不做处理的字符
+ return CHAR_USELESS;
+
+ }
+
+ /**
+ * 进行字符规格化(全角转半角,大写转小写处理)
+ * @param input
+ * @return char
+ */
+ public static int regularize(int input){
+ if (input == 12288) {
+ input = 32;
+
+ }else if (input > 65280 && input < 65375) {
+ input = input - 65248;
+
+ }else if (input >= 'A' && input <= 'Z') {
+ input += 32;
+ }
+
+
+ return input;
+ }
+}
diff --git a/core/src/test/java/org/wltea/analyzer/fcp/Configuration4Test.java b/core/src/test/java/org/wltea/analyzer/fcp/Configuration4Test.java
new file mode 100644
index 0000000..34c6d8e
--- /dev/null
+++ b/core/src/test/java/org/wltea/analyzer/fcp/Configuration4Test.java
@@ -0,0 +1,29 @@
+package org.wltea.analyzer.fcp;
+
+import org.wltea.analyzer.cfg.Configuration;
+
+import java.io.File;
+import java.net.URI;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+/**
+ * @ClassName Configuration4Test
+ * @Description:
+ */
+public class Configuration4Test extends Configuration {
+ @Override
+ public Path getConfDir() {
+ return Paths.get("../", "config");
+ }
+
+ @Override
+ public Path getConfigInPluginDir() {
+ return Paths.get("../", "config");
+ }
+
+ @Override
+ public Path getPath(String first, String... more) {
+ return Paths.get(first, more);
+ }
+}
diff --git a/core/src/test/java/org/wltea/analyzer/fcp/FCPAnalyzerTest.java b/core/src/test/java/org/wltea/analyzer/fcp/FCPAnalyzerTest.java
new file mode 100644
index 0000000..b65507d
--- /dev/null
+++ b/core/src/test/java/org/wltea/analyzer/fcp/FCPAnalyzerTest.java
@@ -0,0 +1,80 @@
+package org.wltea.analyzer.fcp;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.junit.Before;
+import org.junit.Test;
+import org.wltea.analyzer.dic.Dictionary;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+/**
+ * @ClassName FCPAnalyzerTest
+ * @Description: fcp test
+ */
+public class FCPAnalyzerTest {
+
+ @Before
+ public void init() {
+ // 初始化词典
+ Dictionary.initial(new Configuration4Test());
+ }
+
+ @Test
+ public void testFcpIndexAnalyzer() {
+ FCPAnalyzer fcpIndex = new FCPAnalyzer(true);
+ String str = "这里是中国, this is china #4.345^";
+ TokenStream stream = null ;
+ try {
+ stream = fcpIndex.tokenStream( "any", new StringReader(str)) ;
+ PositionIncrementAttribute pia = stream.addAttribute(PositionIncrementAttribute.class ) ; //保存位置
+ OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class ) ; //保存辞与词之间偏移量
+ CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class ) ;//保存响应词汇
+ TypeAttribute ta = stream.addAttribute(TypeAttribute.class ) ; //保存类型
+ stream.reset() ;
+ int position = -1;
+ while (stream.incrementToken()) {
+ position += pia.getPositionIncrement();
+ System. out.println(position + ":[" + cta.toString() + "]:" + oa.startOffset() + "->" + oa.endOffset() + ":" + ta.type());
+ }
+ stream.end() ;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ @Test
+ public void testFcpSearchAnalyzer() {
+ FCPAnalyzer fcpSearch = new FCPAnalyzer(false);
+ String str = "这里是中国, this is china #4.345^";
+ TokenStream stream = null ;
+ try {
+ stream = fcpSearch.tokenStream( "any", new StringReader(str)) ;
+ PositionIncrementAttribute pia = stream.addAttribute(PositionIncrementAttribute.class ) ; //保存位置
+ OffsetAttribute oa = stream.addAttribute(OffsetAttribute.class ) ; //保存辞与词之间偏移量
+ CharTermAttribute cta = stream.addAttribute(CharTermAttribute.class ) ;//保存响应词汇
+ TypeAttribute ta = stream.addAttribute(TypeAttribute.class ) ; //保存类型
+ stream.reset() ;
+ int position = -1;
+ while (stream.incrementToken()) {
+ position += pia.getPositionIncrement();
+ System. out.println(position + ":[" + cta.toString() + "]:" + oa.startOffset() + "->" + oa.endOffset() + ":" + ta.type());
+ }
+ stream.end() ;
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ @Test
+ public void test03() {
+ String s = " \t \n";
+ System.out.println(s.trim().length() == 0);
+ }
+}
diff --git a/elasticsearch/src/main/java/com/infinilabs/ik/elasticsearch/AnalysisIkPlugin.java b/elasticsearch/src/main/java/com/infinilabs/ik/elasticsearch/AnalysisIkPlugin.java
index 54ee735..f906af6 100644
--- a/elasticsearch/src/main/java/com/infinilabs/ik/elasticsearch/AnalysisIkPlugin.java
+++ b/elasticsearch/src/main/java/com/infinilabs/ik/elasticsearch/AnalysisIkPlugin.java
@@ -33,6 +33,12 @@ public Map {
+ private final FCPAnalyzer analyzer;
+
+ /**
+ * indexMode 作为重要的参数,
+ * @param indexSettings
+ * @param env
+ * @param name
+ * @param settings
+ * @param indexMode
+ */
+ public FCPAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings, boolean indexMode) {
+ super(name, settings);
+ boolean splitComplete = settings.getAsBoolean("split_complete", FCPAnalyzer.DEFAULT_SPLIT_COMPLETE);
+ int maxTokenLength = settings.getAsInt("max_token_length", CombineCharFilter.DEFAULT_MAX_WORD_LEN);
+ boolean uselessMapping = settings.getAsBoolean("useless_mapping", ExtendFilter.DEFAULT_USELESS_MAPPING);
+ boolean ignoreBlank = settings.getAsBoolean("ignore_blank", ExtendFilter.DEFAULT_IGNORE_BLANK);
+ boolean useFirstPos = settings.getAsBoolean("use_first_position", ExtendFilter.DEFAULT_USE_FIRST_POSITION);
+ Boolean showOffset = settings.getAsBoolean("show_offset", null);
+ analyzer = new FCPAnalyzer(indexMode);
+ if (showOffset != null) {
+ analyzer.setShowOffset(showOffset);
+ }
+ analyzer.setSplitComplete(splitComplete);
+ analyzer.setUselessMapping(uselessMapping);
+ analyzer.setMaxTokenLength(maxTokenLength);
+ analyzer.setIgnoreBlank(ignoreBlank);
+ analyzer.setUseFirstPos(useFirstPos);
+ }
+
+ public static FCPAnalyzerProvider getFCPAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ boolean indexMode = settings.getAsBoolean("index_mode", ExtendFilter.DEFAULT_INDEX_MODE);
+ return new FCPAnalyzerProvider(indexSettings, env, name, settings, indexMode);
+ }
+
+ public static FCPAnalyzerProvider getFCPIndexAnalyzer(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ boolean indexMode = true;
+ boolean useFirstPos = true;
+ FCPAnalyzerProvider provider = new FCPAnalyzerProvider(indexSettings, env, name, settings, indexMode);
+ FCPAnalyzer fcpAnalyzer = provider.get();
+ fcpAnalyzer.setUseFirstPos(useFirstPos);
+ return provider;
+ }
+
+ public static FCPAnalyzerProvider getFCPSearchAnalyzer(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ boolean indexMode = false;
+ boolean useFirstPos = true;
+ FCPAnalyzerProvider provider = new FCPAnalyzerProvider(indexSettings, env, name, settings, indexMode);
+ FCPAnalyzer fcpAnalyzer = provider.get();
+ fcpAnalyzer.setUseFirstPos(useFirstPos);
+ return provider;
+ }
+
+ public static FCPAnalyzerProvider getLCPIndexAnalyzer(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ boolean indexMode = true;
+ FCPAnalyzerProvider provider = new FCPAnalyzerProvider(indexSettings, env, name, settings, indexMode);
+ FCPAnalyzer fcpAnalyzer = provider.get();
+ fcpAnalyzer.setUseFirstPos(false);
+ return provider;
+ }
+
+ public static FCPAnalyzerProvider getLCPSearchAnalyzer(IndexSettings indexSettings, Environment env, String name, Settings settings) {
+ boolean indexMode = false;
+ FCPAnalyzerProvider provider = new FCPAnalyzerProvider(indexSettings, env, name, settings, indexMode);
+ FCPAnalyzer fcpAnalyzer = provider.get();
+ fcpAnalyzer.setUseFirstPos(false);
+ return provider;
+ }
+
+ @Override
+ public FCPAnalyzer get() {
+ return analyzer;
+ }
+}
\ No newline at end of file