/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.analysis; import java.io.File; import java.io.Reader; import java.net.URISyntaxException; import java.net.URL; import java.util.List; import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.synonym.SolrSynonymParser; import org.apache.lucene.analysis.synonym.SynonymMap; import org.apache.lucene.analysis.synonym.WordnetSynonymParser; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.indices.analysis.IndicesAnalysisService; @AnalysisSettingsRequired public class HeatSynonymTokenFilterFactory extends AbstractTokenFilterFactory { private SynonymMap synonymMap; private boolean ignoreCase; private long lastModified; private HeatSynonymFilter mySynonymFilter; private static ScheduledExecutorService pool = Executors .newScheduledThreadPool(1); @SuppressWarnings("unused") private void initMonitor(final Index index, final Settings indexSettings, final Environment env, final IndicesAnalysisService indicesAnalysisService, final Map<String, TokenizerFactoryFactory> tokenizerFactories, final String name, final Settings settings) { pool.scheduleAtFixedRate(new Runnable() { @Override public void run() { try { if (mySynonymFilter != null) { System.out .println("init SynonymTokenFilterFactory................ "); boolean flag = init(index, indexSettings, env, indicesAnalysisService, tokenizerFactories, name, settings); if (flag) { mySynonymFilter.init(synonymMap, ignoreCase); } else { System.out.println("文件没有修改。。。。。。。。。。。。。。。。。。"); } } else { System.out .println("mySynonymFilter is null................."); } } catch (Exception e) { e.printStackTrace(); } } }, 10, 60, TimeUnit.SECONDS); } @Inject public HeatSynonymTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, IndicesAnalysisService indicesAnalysisService, Map<String, TokenizerFactoryFactory> tokenizerFactories, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); System.out .println("SynonymTokenFilterFactory init <<<<<<<<<<<<<<<<<<<<<"); boolean flag = init(index, indexSettings, env, indicesAnalysisService, tokenizerFactories, name, settings); System.out .println("SynonymTokenFilterFactory init >>>>>>>>>>>>>>>>>>>>>>" + flag); if (flag) { initMonitor(index, indexSettings, env, indicesAnalysisService, tokenizerFactories, name, settings); } } public boolean init(Index index, Settings indexSettings, Environment env, IndicesAnalysisService indicesAnalysisService, Map<String, TokenizerFactoryFactory> tokenizerFactories, String name, Settings settings) { boolean flag = true; Reader rulesReader = null; if (settings.getAsArray("synonyms", null) != null) { List<String> rules = Analysis .getWordList(env, settings, "synonyms"); StringBuilder sb = new StringBuilder(); for (String line : rules) { sb.append(line).append(System.getProperty("line.separator")); } rulesReader = new FastStringReader(sb.toString()); } else if (settings.get("synonyms_path") != null) { String filePath = settings.get("synonyms_path"); System.out.println("synonyms_path :" + filePath); URL fileUrl = env.resolveConfig(filePath); File file = null; try { file = new File(fileUrl.toURI()); } catch (URISyntaxException e) { e.printStackTrace(); } if (file != null && file.exists()) { if (lastModified != file.lastModified()) { lastModified = file.lastModified(); } else { return false; } } rulesReader = Analysis.getReaderFromFile(env, settings, "synonyms_path"); } else { throw new ElasticsearchIllegalArgumentException( "synonym requires either `synonyms` or `synonyms_path` to be configured"); } this.ignoreCase = settings.getAsBoolean("ignore_case", false); boolean expand = settings.getAsBoolean("expand", true); String tokenizerName = settings.get("tokenizer", "whitespace"); TokenizerFactoryFactory tokenizerFactoryFactory = tokenizerFactories .get(tokenizerName); if (tokenizerFactoryFactory == null) { tokenizerFactoryFactory = indicesAnalysisService .tokenizerFactoryFactory(tokenizerName); } if (tokenizerFactoryFactory == null) { throw new ElasticsearchIllegalArgumentException( "failed to find tokenizer [" + tokenizerName + "] for synonym token filter"); } final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory .create(tokenizerName, settings); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer( Lucene.ANALYZER_VERSION, reader) : tokenizerFactory .create(reader); TokenStream stream = ignoreCase ? new LowerCaseFilter( Lucene.ANALYZER_VERSION, tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; try { SynonymMap.Builder parser = null; if ("wordnet".equalsIgnoreCase(settings.get("format"))) { parser = new WordnetSynonymParser(true, expand, analyzer); ((WordnetSynonymParser) parser).parse(rulesReader); } else { parser = new SolrSynonymParser(true, expand, analyzer); ((SolrSynonymParser) parser).parse(rulesReader); } synonymMap = parser.build(); System.out.println("synonymMap.words.size==" + synonymMap.words.size()); } catch (Exception e) { throw new ElasticsearchIllegalArgumentException( "failed to build synonyms", e); } return flag; } @Override @SuppressWarnings("resource") public TokenStream create(TokenStream tokenStream) { System.out.println("create.............SynonymTokenFilterFactory"); mySynonymFilter = new HeatSynonymFilter(tokenStream, synonymMap, ignoreCase); // fst is null means no synonyms return synonymMap.fst == null ? tokenStream : mySynonymFilter; } }
package org.elasticsearch.index.analysis; import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.synonym.SynonymMap; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util.fst.FST; public class HeatSynonymFilter extends TokenFilter { public static final String TYPE_SYNONYM = "SYNONYM"; private SynonymMap synonyms; private boolean ignoreCase; private int rollBufferSize; private int captureCount; // TODO: we should set PositionLengthAttr too... private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); // How many future input tokens have already been matched // to a synonym; because the matching is "greedy" we don't // try to do any more matching for such tokens: private int inputSkipCount; // Hold all buffered (read ahead) stacked input tokens for // a future position. When multiple tokens are at the // same position, we only store (and match against) the // term for the first token at the position, but capture // state for (and enumerate) all other tokens at this // position: private static class PendingInput { final CharsRef term = new CharsRef(); AttributeSource.State state; boolean keepOrig; boolean matched; boolean consumed = true; int startOffset; int endOffset; public void reset() { state = null; consumed = true; keepOrig = false; matched = false; } }; // Rolling buffer, holding pending input tokens we had to // clone because we needed to look ahead, indexed by // position: private PendingInput[] futureInputs; // Holds pending output synonyms for one future position: private static class PendingOutputs { CharsRef[] outputs; int[] endOffsets; int[] posLengths; int upto; int count; int posIncr = 1; int lastEndOffset; int lastPosLength; public PendingOutputs() { outputs = new CharsRef[1]; endOffsets = new int[1]; posLengths = new int[1]; } public void reset() { upto = count = 0; posIncr = 1; } public CharsRef pullNext() { assert upto < count; lastEndOffset = endOffsets[upto]; lastPosLength = posLengths[upto]; final CharsRef result = outputs[upto++]; posIncr = 0; if (upto == count) { reset(); } return result; } public int getLastEndOffset() { return lastEndOffset; } public int getLastPosLength() { return lastPosLength; } public void add(char[] output, int offset, int len, int endOffset, int posLength) { if (count == outputs.length) { final CharsRef[] next = new CharsRef[ArrayUtil.oversize( 1 + count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)]; System.arraycopy(outputs, 0, next, 0, count); outputs = next; } if (count == endOffsets.length) { final int[] next = new int[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_INT)]; System.arraycopy(endOffsets, 0, next, 0, count); endOffsets = next; } if (count == posLengths.length) { final int[] next = new int[ArrayUtil.oversize(1 + count, RamUsageEstimator.NUM_BYTES_INT)]; System.arraycopy(posLengths, 0, next, 0, count); posLengths = next; } if (outputs[count] == null) { outputs[count] = new CharsRef(); } outputs[count].copyChars(output, offset, len); // endOffset can be -1, in which case we should simply // use the endOffset of the input token, or X >= 0, in // which case we use X as the endOffset for this output endOffsets[count] = endOffset; posLengths[count] = posLength; count++; } }; private final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); // Rolling buffer, holding stack of pending synonym // outputs, indexed by position: private PendingOutputs[] futureOutputs; // Where (in rolling buffers) to write next input saved state: private int nextWrite; // Where (in rolling buffers) to read next input saved state: private int nextRead; // True once we've read last token private boolean finished; private FST.Arc<BytesRef> scratchArc; private FST<BytesRef> fst; private FST.BytesReader fstReader; private BytesRef scratchBytes = new BytesRef(); private CharsRef scratchChars = new CharsRef(); /** * @param input * input tokenstream * @param synonyms * synonym map * @param ignoreCase * case-folds input for matching with * {@link Character#toLowerCase(int)}. Note, if you set this to * true, its your responsibility to lowercase the input entries * when you create the {@link SynonymMap} */ public HeatSynonymFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { super(input); init(synonyms, ignoreCase); } public void init(SynonymMap synonyms, boolean ignoreCase) { this.synonyms = synonyms; this.ignoreCase = ignoreCase; this.fst = synonyms.fst; this.fstReader = fst.getBytesReader(); if (fst == null) { throw new IllegalArgumentException("fst must be non-null"); } // Must be 1+ so that when roll buffer is at full // lookahead we can distinguish this full buffer from // the empty buffer: rollBufferSize = 1 + synonyms.maxHorizontalContext; futureInputs = new PendingInput[rollBufferSize]; futureOutputs = new PendingOutputs[rollBufferSize]; for (int pos = 0; pos < rollBufferSize; pos++) { futureInputs[pos] = new PendingInput(); futureOutputs[pos] = new PendingOutputs(); } // System.out.println("FSTFilt maxH=" + synonyms.maxHorizontalContext); scratchArc = new FST.Arc<BytesRef>(); } private void capture() { captureCount++; // System.out.println(" capture slot=" + nextWrite); final PendingInput input = futureInputs[nextWrite]; input.state = captureState(); input.consumed = false; input.term.copyChars(termAtt.buffer(), 0, termAtt.length()); nextWrite = rollIncr(nextWrite); // Buffer head should never catch up to tail: assert nextWrite != nextRead; } /* * This is the core of this TokenFilter: it locates the synonym matches and * buffers up the results into futureInputs/Outputs. * * NOTE: this calls input.incrementToken and does not capture the state if * no further tokens were checked. So caller must then forward state to our * caller, or capture: */ private int lastStartOffset; private int lastEndOffset; private void parse() throws IOException { // System.out.println("\nS: parse"); assert inputSkipCount == 0; int curNextRead = nextRead; // Holds the longest match we've seen so far: BytesRef matchOutput = null; int matchInputLength = 0; int matchEndOffset = -1; BytesRef pendingOutput = fst.outputs.getNoOutput(); fst.getFirstArc(scratchArc); assert scratchArc.output == fst.outputs.getNoOutput(); int tokenCount = 0; byToken: while (true) { // Pull next token's chars: final char[] buffer; final int bufferLen; // System.out.println(" cycle nextRead=" + curNextRead + // " nextWrite=" + nextWrite); int inputEndOffset = 0; if (curNextRead == nextWrite) { // We used up our lookahead buffer of input tokens // -- pull next real input token: if (finished) { break; } else { // System.out.println(" input.incrToken"); assert futureInputs[nextWrite].consumed; // Not correct: a syn match whose output is longer // than its input can set future inputs keepOrig // to true: // assert !futureInputs[nextWrite].keepOrig; if (input.incrementToken()) { buffer = termAtt.buffer(); bufferLen = termAtt.length(); final PendingInput input = futureInputs[nextWrite]; lastStartOffset = input.startOffset = offsetAtt .startOffset(); lastEndOffset = input.endOffset = offsetAtt.endOffset(); inputEndOffset = input.endOffset; // System.out.println(" new token=" + new // String(buffer, 0, bufferLen)); if (nextRead != nextWrite) { capture(); } else { input.consumed = false; } } else { // No more input tokens // System.out.println(" set end"); finished = true; break; } } } else { // Still in our lookahead buffer = futureInputs[curNextRead].term.chars; bufferLen = futureInputs[curNextRead].term.length; inputEndOffset = futureInputs[curNextRead].endOffset; // System.out.println(" old token=" + new String(buffer, 0, // bufferLen)); } tokenCount++; // Run each char in this token through the FST: int bufUpto = 0; while (bufUpto < bufferLen) { final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen); if (fst.findTargetArc( ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == null) { // System.out.println(" stop"); break byToken; } // Accum the output pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); // System.out.println(" char=" + buffer[bufUpto] + " output=" // + pendingOutput + " arc.output=" + scratchArc.output); bufUpto += Character.charCount(codePoint); } // OK, entire token matched; now see if this is a final // state: if (scratchArc.isFinal()) { matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); matchInputLength = tokenCount; matchEndOffset = inputEndOffset; // System.out.println(" found matchLength=" + matchInputLength // + " output=" + matchOutput); } // See if the FST wants to continue matching (ie, needs to // see the next input token): if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null) { // No further rules can match here; we're done // searching for matching rules starting at the // current input position. break; } else { // More matching is possible -- accum the output (if // any) of the WORD_SEP arc: pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); if (nextRead == nextWrite) { capture(); } } curNextRead = rollIncr(curNextRead); } if (nextRead == nextWrite && !finished) { // System.out.println(" skip write slot=" + nextWrite); nextWrite = rollIncr(nextWrite); } if (matchOutput != null) { // System.out.println(" add matchLength=" + matchInputLength + // " output=" + matchOutput); inputSkipCount = matchInputLength; addOutput(matchOutput, matchInputLength, matchEndOffset); } else if (nextRead != nextWrite) { // Even though we had no match here, we set to 1 // because we need to skip current input token before // trying to match again: inputSkipCount = 1; } else { assert finished; } // System.out.println(" parse done inputSkipCount=" + inputSkipCount + // " nextRead=" + nextRead + " nextWrite=" + nextWrite); } // Interleaves all output tokens onto the futureOutputs: private void addOutput(BytesRef bytes, int matchInputLength, int matchEndOffset) { bytesReader.reset(bytes.bytes, bytes.offset, bytes.length); final int code = bytesReader.readVInt(); final boolean keepOrig = (code & 0x1) == 0; final int count = code >>> 1; // System.out.println(" addOutput count=" + count + " keepOrig=" + // keepOrig); for (int outputIDX = 0; outputIDX < count; outputIDX++) { synonyms.words.get(bytesReader.readVInt(), scratchBytes); // System.out.println(" outIDX=" + outputIDX + " bytes=" + // scratchBytes.length); UnicodeUtil.UTF8toUTF16(scratchBytes, scratchChars); int lastStart = scratchChars.offset; final int chEnd = lastStart + scratchChars.length; int outputUpto = nextRead; for (int chIDX = lastStart; chIDX <= chEnd; chIDX++) { if (chIDX == chEnd || scratchChars.chars[chIDX] == SynonymMap.WORD_SEPARATOR) { final int outputLen = chIDX - lastStart; // Caller is not allowed to have empty string in // the output: assert outputLen > 0 : "output contains empty string: " + scratchChars; final int endOffset; final int posLen; if (chIDX == chEnd && lastStart == scratchChars.offset) { // This rule had a single output token, so, we set // this output's endOffset to the current // endOffset (ie, endOffset of the last input // token it matched): endOffset = matchEndOffset; posLen = keepOrig ? matchInputLength : 1; } else { // This rule has more than one output token; we // can't pick any particular endOffset for this // case, so, we inherit the endOffset for the // input token which this output overlaps: endOffset = -1; posLen = 1; } futureOutputs[outputUpto].add(scratchChars.chars, lastStart, outputLen, endOffset, posLen); // System.out.println(" " + new // String(scratchChars.chars, lastStart, outputLen) + // " outputUpto=" + outputUpto); lastStart = 1 + chIDX; // System.out.println(" slot=" + outputUpto + " keepOrig=" // + keepOrig); outputUpto = rollIncr(outputUpto); assert futureOutputs[outputUpto].posIncr == 1 : "outputUpto=" + outputUpto + " vs nextWrite=" + nextWrite; } } } int upto = nextRead; for (int idx = 0; idx < matchInputLength; idx++) { futureInputs[upto].keepOrig |= keepOrig; futureInputs[upto].matched = true; upto = rollIncr(upto); } } // ++ mod rollBufferSize private int rollIncr(int count) { count++; if (count == rollBufferSize) { return 0; } else { return count; } } // for testing int getCaptureCount() { return captureCount; } @Override public boolean incrementToken() throws IOException { // System.out.println("\nS: incrToken inputSkipCount=" + inputSkipCount // + " nextRead=" + nextRead + " nextWrite=" + nextWrite); while (true) { // First play back any buffered future inputs/outputs // w/o running parsing again: while (inputSkipCount != 0) { // At each position, we first output the original // token // TODO: maybe just a PendingState class, holding // both input & outputs? final PendingInput input = futureInputs[nextRead]; final PendingOutputs outputs = futureOutputs[nextRead]; // System.out.println(" cycle nextRead=" + nextRead + // " nextWrite=" + nextWrite + " inputSkipCount="+ // inputSkipCount + " input.keepOrig=" + input.keepOrig + // " input.consumed=" + input.consumed + " input.state=" + // input.state); if (!input.consumed && (input.keepOrig || !input.matched)) { if (input.state != null) { // Return a previously saved token (because we // had to lookahead): restoreState(input.state); } else { // Pass-through case: return token we just pulled // but didn't capture: assert inputSkipCount == 1 : "inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead; } input.reset(); if (outputs.count > 0) { outputs.posIncr = 0; } else { nextRead = rollIncr(nextRead); inputSkipCount--; } // System.out.println(" return token=" + // termAtt.toString()); return true; } else if (outputs.upto < outputs.count) { // Still have pending outputs to replay at this // position input.reset(); final int posIncr = outputs.posIncr; final CharsRef output = outputs.pullNext(); clearAttributes(); termAtt.copyBuffer(output.chars, output.offset, output.length); typeAtt.setType(TYPE_SYNONYM); int endOffset = outputs.getLastEndOffset(); if (endOffset == -1) { endOffset = input.endOffset; } offsetAtt.setOffset(input.startOffset, endOffset); posIncrAtt.setPositionIncrement(posIncr); posLenAtt.setPositionLength(outputs.getLastPosLength()); if (outputs.count == 0) { // Done with the buffered input and all outputs at // this position nextRead = rollIncr(nextRead); inputSkipCount--; } // System.out.println(" return token=" + // termAtt.toString()); return true; } else { // Done with the buffered input and all outputs at // this position input.reset(); nextRead = rollIncr(nextRead); inputSkipCount--; } } if (finished && nextRead == nextWrite) { // End case: if any output syns went beyond end of // input stream, enumerate them now: final PendingOutputs outputs = futureOutputs[nextRead]; if (outputs.upto < outputs.count) { final int posIncr = outputs.posIncr; final CharsRef output = outputs.pullNext(); futureInputs[nextRead].reset(); if (outputs.count == 0) { nextWrite = nextRead = rollIncr(nextRead); } clearAttributes(); // Keep offset from last input token: offsetAtt.setOffset(lastStartOffset, lastEndOffset); termAtt.copyBuffer(output.chars, output.offset, output.length); typeAtt.setType(TYPE_SYNONYM); // System.out.println(" set posIncr=" + outputs.posIncr + // " outputs=" + outputs); posIncrAtt.setPositionIncrement(posIncr); // System.out.println(" return token=" + // termAtt.toString()); return true; } else { return false; } } // Find new synonym matches: parse(); } } @Override public void reset() throws IOException { super.reset(); captureCount = 0; finished = false; inputSkipCount = 0; nextRead = nextWrite = 0; // In normal usage these resets would not be needed, // since they reset-as-they-are-consumed, but the app // may not consume all input tokens (or we might hit an // exception), in which case we have leftover state // here: for (PendingInput input : futureInputs) { input.reset(); } for (PendingOutputs output : futureOutputs) { output.reset(); } } }
相关推荐
总的来说,"Elasticsearch7.17.5版本同义词热更新插件源码"是解决动态同义词管理问题的一个重要工具,它提升了Elasticsearch的灵活性,使得搜索引擎能够在不停机的情况下适应业务需求的变化。如果你需要在项目中实现...
本资源包“es5.3.2使用热词、停用词、同义词词典.rar”提供了针对Elasticsearch 5.3.2版本的热词、停用词和同义词的相关工具和配置,帮助用户优化全文搜索引擎的表现。 首先,我们来理解一下这些概念: 1. **热词*...
**Elasticsearch与MySQL热更新IK词典项目详解** 在现代大数据检索和分析场景中,Elasticsearch(ES)作为一款强大的开源搜索引擎,被广泛应用于日志分析、全文搜索等领域。而IK(Intelligent Chinese)分词器是针对...
在Elasticsearch(ES)中,相关性同义词的处理对于改善搜索结果的质量至关重要,尤其是当用户期望通过搜索一个词来召回具有相似含义的其他词语时。为了实现这一目标,可以通过自定义分词器并维护同义词的方式来达到...
在本话题中,我们将聚焦于“ES同义词插件analysis-dynamic-synonym7.5.1版本”,它是Elasticsearch中一个增强搜索功能的重要组件。 Elasticsearch同义词插件的作用在于扩展默认的文本分析过程,允许我们在搜索时将...
elasticsearch 同义词创建索引实例
本篇文章将重点讲解“ES同义词插件”——elasticsearch-analysis-dynamic-synonym-6.5.1,这是针对Elasticsearch 6.x版本的一个定制化插件,用于实现同义词处理功能。 标题中的"ES同义词插件 elasticsearch-...
ES搜索引擎同义词库
OpenGL ES(OpenGL for Embedded Systems)是OpenGL的一个精简版本,主要设计用于嵌入式设备,如智能手机、游戏机和掌上电脑等。OpenGL ES 1.1是在OpenGL ES 1.0的基础上进行了一些功能增强和错误修复,是移动平台...
总结来说,通过改造IK分词器源码并集成MySQL数据库,我们可以实现动态更新热词库,从而提高Elasticsearch的分词效果,满足实时性需求。这种方式不仅解决了内置词库和静态词库的不足,还提供了更大的灵活性,能够适应...
总结来说,Elasticsearch 同义词插件是一种强大的工具,它增强了ES对同义词的处理能力,提供了动态更新功能,使我们能够在不中断服务的情况下优化搜索性能。结合正确配置和有效管理,这个插件可以显著提升ES在搜索...
Elasticsearch同义词远程概述Elasticsearch同义词远程插件提供file_remote_synonym过滤器版本版本弹性搜索掌握1.7.1 1.0.0 1.7.1安装$ $ES_HOME/bin/plugin --install elasticsearch-synonym-remote --url file:\/\/...
通过这个新特性,用户可以创建一个包含同义词的MySQL表,并设置定时任务来定期更新同义词,然后在Elasticsearch中配置IK插件使用这个数据库源。这样,每次有新的同义词添加或修改,插件会自动同步,无需重启服务,...
标题 "elasticsearch-analysis-dynamic-synonym-7.12.1.zip" 指的是一个针对Elasticsearch的自定义同义词分析插件,该插件版本为7.12.1。Elasticsearch是一款流行的开源搜索引擎,用于处理、存储、搜索和分析大量...
- 编辑`/opt/es/elasticsearch-7.9.0/config/elasticsearch.yml`文件,设置以下配置项: - `cluster.name: idoc-es`,设置集群名称。 - `node.name: es-01`,设置节点名称。 - `path.data: /opt/es/elastic...
总之,"dynamic-synonym-6.5.1同义词低版本修改插件"为Elasticsearch 6.5.1提供了一个有效的同义词管理方案,通过适当的配置和使用,可以极大地提升中文搜索的准确性和用户体验。但在使用过程中,务必注意版本匹配、...
"elasticsearch-analysis-dynamic-synonym-7.0.0.zip"是一个专为Elasticsearch设计的同义词插件,它的主要目的是在搜索过程中实现同义词的智能匹配,提高搜索的准确性和用户体验。 这个插件的独特之处在于它支持...
基于HanLP自然语言处理包的elasticsearch分词器 功能 本分词器使用HanLP提供的维特比分词 屏蔽了本地配置用户词典 增加同义词索引功能 增加远程词典热更新(用户词典,停词典,同义词典) 使用 目前支持的es版本为...
插件地址 https://github.com/bells/elasticsearch-analysis-dynamic-synonym