luncen 查询条件不能大于1024
package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. */ public class BooleanQuery extends Query implements Iterable<BooleanClause> { private static int maxClauseCount = 10240; /** Thrown when an attempt is made to add more than {@link * #getMaxClauseCount()} clauses. This typically happens if * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery * is expanded to many terms during search. */ public static class TooManyClauses extends RuntimeException { public TooManyClauses() { super("maxClauseCount is set to " + maxClauseCount); } } /** Return the maximum number of clauses permitted, 1024 by default. * Attempts to add more than the permitted number of clauses cause {@link * TooManyClauses} to be thrown. * @see #setMaxClauseCount(int) */ public static int getMaxClauseCount() { return maxClauseCount; } /** * Set the maximum number of clauses permitted per BooleanQuery. * Default value is 1024. */ public static void setMaxClauseCount(int maxClauseCount) { if (maxClauseCount < 1) { throw new IllegalArgumentException("maxClauseCount must be >= 1"); } BooleanQuery.maxClauseCount = maxClauseCount; } private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>(); private final boolean disableCoord; /** Constructs an empty boolean query. */ public BooleanQuery() { disableCoord = false; } /** Constructs an empty boolean query. * * {@link Similarity#coord(int,int)} may be disabled in scoring, as * appropriate. For example, this score factor does not make sense for most * automatically generated queries, like {@link WildcardQuery} and {@link * FuzzyQuery}. * * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring. */ public BooleanQuery(boolean disableCoord) { this.disableCoord = disableCoord; } /** Returns true iff {@link Similarity#coord(int,int)} is disabled in * scoring for this query instance. * @see #BooleanQuery(boolean) */ public boolean isCoordDisabled() { return disableCoord; } /** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. * * <p> * By default no optional clauses are necessary for a match * (unless there are no required clauses). If this method is used, * then the specified number of clauses is required. * </p> * <p> * Use of this method is totally independent of specifying that * any specific clauses are required (or prohibited). This number will * only be compared against the number of matching optional clauses. * </p> * * @param min the number of optional clauses that must match */ public void setMinimumNumberShouldMatch(int min) { this.minNrShouldMatch = min; } protected int minNrShouldMatch = 0; /** * Gets the minimum number of the optional BooleanClauses * which must be satisfied. */ public int getMinimumNumberShouldMatch() { return minNrShouldMatch; } /** Adds a clause to a boolean query. * * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number * @see #getMaxClauseCount() */ public void add(Query query, BooleanClause.Occur occur) { add(new BooleanClause(query, occur)); } /** Adds a clause to a boolean query. * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number * @see #getMaxClauseCount() */ public void add(BooleanClause clause) { if (clauses.size() >= maxClauseCount) { throw new TooManyClauses(); } clauses.add(clause); } /** Returns the set of clauses in this query. */ public BooleanClause[] getClauses() { return clauses.toArray(new BooleanClause[clauses.size()]); } /** Returns the list of clauses in this query. */ public List<BooleanClause> clauses() { return clauses; } /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to * make it possible to do: * <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre> */ @Override public final Iterator<BooleanClause> iterator() { return clauses().iterator(); } /** * Expert: the Weight for BooleanQuery, used to * normalize, score and explain these queries. * * <p>NOTE: this API and implementation is subject to * change suddenly in the next release.</p> */ protected class BooleanWeight extends Weight { /** The Similarity implementation. */ protected Similarity similarity; protected ArrayList<Weight> weights; protected int maxCoord; // num optional + num required private final boolean disableCoord; public BooleanWeight(IndexSearcher searcher, boolean disableCoord) throws IOException { this.similarity = searcher.getSimilarity(); this.disableCoord = disableCoord; weights = new ArrayList<Weight>(clauses.size()); for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); Weight w = c.getQuery().createWeight(searcher); weights.add(w); if (!c.isProhibited()) { maxCoord++; } } } @Override public Query getQuery() { return BooleanQuery.this; } @Override public float getValueForNormalization() throws IOException { float sum = 0.0f; for (int i = 0 ; i < weights.size(); i++) { // call sumOfSquaredWeights for all clauses in case of side effects float s = weights.get(i).getValueForNormalization(); // sum sub weights if (!clauses.get(i).isProhibited()) { // only add to sum for non-prohibited clauses sum += s; } } sum *= getBoost() * getBoost(); // boost each sub-weight return sum ; } public float coord(int overlap, int maxOverlap) { // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away, // so coord() is not applied. But when BQ cannot optimize itself away // for a single clause (minNrShouldMatch, prohibited clauses, etc), its // important not to apply coord(1,1) for consistency, it might not be 1.0F return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap); } @Override public void normalize(float norm, float topLevelBoost) { topLevelBoost *= getBoost(); // incorporate boost for (Weight w : weights) { // normalize all clauses, (even if prohibited in case of side affects) w.normalize(norm, topLevelBoost); } } @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { final int minShouldMatch = BooleanQuery.this.getMinimumNumberShouldMatch(); ComplexExplanation sumExpl = new ComplexExplanation(); sumExpl.setDescription("sum of:"); int coord = 0; float sum = 0.0f; boolean fail = false; int shouldMatchCount = 0; Iterator<BooleanClause> cIter = clauses.iterator(); for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) { Weight w = wIter.next(); BooleanClause c = cIter.next(); if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) { if (c.isRequired()) { fail = true; Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); sumExpl.addDetail(r); } continue; } Explanation e = w.explain(context, doc); if (e.isMatch()) { if (!c.isProhibited()) { sumExpl.addDetail(e); sum += e.getValue(); coord++; } else { Explanation r = new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; } if (c.getOccur() == Occur.SHOULD) { shouldMatchCount++; } } else if (c.isRequired()) { Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); r.addDetail(e); sumExpl.addDetail(r); fail = true; } } if (fail) { sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription ("Failure to meet condition(s) of required/prohibited clause(s)"); return sumExpl; } else if (shouldMatchCount < minShouldMatch) { sumExpl.setMatch(Boolean.FALSE); sumExpl.setValue(0.0f); sumExpl.setDescription("Failure to match minimum number "+ "of optional clauses: " + minShouldMatch); return sumExpl; } sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); sumExpl.setValue(sum); final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord); if (coordFactor == 1.0f) { return sumExpl; // eliminate wrapper } else { ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), sum*coordFactor, "product of:"); result.addDetail(sumExpl); result.addDetail(new Explanation(coordFactor, "coord("+coord+"/"+maxCoord+")")); return result; } } @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { List<Scorer> required = new ArrayList<Scorer>(); List<Scorer> prohibited = new ArrayList<Scorer>(); List<Scorer> optional = new ArrayList<Scorer>(); Iterator<BooleanClause> cIter = clauses.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); Scorer subScorer = w.scorer(context, true, false, acceptDocs); if (subScorer == null) { if (c.isRequired()) { return null; } } else if (c.isRequired()) { required.add(subScorer); } else if (c.isProhibited()) { prohibited.add(subScorer); } else { optional.add(subScorer); } } // NOTE: we could also use BooleanScorer, if we knew // this BooleanQuery was embedded in another // BooleanQuery that was also using BooleanScorer (ie, // BooleanScorer can nest). But this is hard to // detect and we never do so today... (ie, we only // return BooleanScorer for topScorer): // Check if we can and should return a BooleanScorer // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch // but the same is even true of pure conjunctions... if (!scoreDocsInOrder && topScorer && required.size() == 0 && minNrShouldMatch <= 1) { return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord); } if (required.size() == 0 && optional.size() == 0) { // no required and optional clauses. return null; } else if (optional.size() < minNrShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 // optional scorer. Therefore if there are not enough optional scorers // no documents will be matched by the query return null; } // simple conjunction if (optional.size() == 0 && prohibited.size() == 0) { float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord); return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord); } // simple disjunction if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) { float coord[] = new float[optional.size()+1]; for (int i = 0; i < coord.length; i++) { coord[i] = disableCoord ? 1.0f : coord(i, maxCoord); } return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord); } // Return a BooleanScorer2 return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord); } @Override public boolean scoresDocsOutOfOrder() { for (BooleanClause c : clauses) { if (c.isRequired()) { return false; // BS2 (in-order) will be used by scorer() } } // scorer() will return an out-of-order scorer if requested. return true; } } @Override public Weight createWeight(IndexSearcher searcher) throws IOException { return new BooleanWeight(searcher, disableCoord); } @Override public Query rewrite(IndexReader reader) throws IOException { if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries BooleanClause c = clauses.get(0); if (!c.isProhibited()) { // just return clause Query query = c.getQuery().rewrite(reader); // rewrite first if (getBoost() != 1.0f) { // incorporate boost if (query == c.getQuery()) { // if rewrite was no-op query = query.clone(); // then clone before boost } // Since the BooleanQuery only has 1 clause, the BooleanQuery will be // written out. Therefore the rewritten Query's boost must incorporate both // the clause's boost, and the boost of the BooleanQuery itself query.setBoost(getBoost() * query.getBoost()); } return query; } } BooleanQuery clone = null; // recursively rewrite for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); Query query = c.getQuery().rewrite(reader); if (query != c.getQuery()) { // clause rewrote: must clone if (clone == null) { // The BooleanQuery clone is lazily initialized so only initialize // it if a rewritten clause differs from the original clause (and hasn't been // initialized already). If nothing differs, the clone isn't needlessly created clone = this.clone(); } clone.clauses.set(i, new BooleanClause(query, c.getOccur())); } } if (clone != null) { return clone; // some clauses rewrote } else { return this; // no clauses rewrote } } // inherit javadoc @Override public void extractTerms(Set<Term> terms) { for (BooleanClause clause : clauses) { if (clause.getOccur() != Occur.MUST_NOT) { clause.getQuery().extractTerms(terms); } } } @Override @SuppressWarnings("unchecked") public BooleanQuery clone() { BooleanQuery clone = (BooleanQuery)super.clone(); clone.clauses = (ArrayList<BooleanClause>) this.clauses.clone(); return clone; } /** Prints a user-readable version of this query. */ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = clauses.get(i); if (c.isProhibited()) { buffer.append("-"); } else if (c.isRequired()) { buffer.append("+"); } Query subQuery = c.getQuery(); if (subQuery != null) { if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subQuery.toString(field)); buffer.append(")"); } else { buffer.append(subQuery.toString(field)); } } else { buffer.append("null"); } if (i != clauses.size()-1) { buffer.append(" "); } } if (needParens) { buffer.append(")"); } if (getMinimumNumberShouldMatch()>0) { buffer.append('~'); buffer.append(getMinimumNumberShouldMatch()); } if (getBoost() != 1.0f) { buffer.append(ToStringUtils.boost(getBoost())); } return buffer.toString(); } /** Returns true iff <code>o</code> is equal to this. */ @Override public boolean equals(Object o) { if (!(o instanceof BooleanQuery)) { return false; } BooleanQuery other = (BooleanQuery)o; return this.getBoost() == other.getBoost() && this.clauses.equals(other.clauses) && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() && this.disableCoord == other.disableCoord; } /** Returns a hash code value for this object.*/ @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ clauses.hashCode() + getMinimumNumberShouldMatch() + (disableCoord ? 17:0); } }
下面是在网上看到得一个解决方法:
可以通过设置:
BooleanQuery.setMaxClauseCount(10000);
来解决问题,但是这样带来的问题是会使得内存开销加大。容易出现OutOfMemory的异常所以需要非常谨慎处理。
Lucene在做大量term值查询时, 如果这值过多, 超1024个term的话, 会出现
TooManyClauses[maxClauseCount is set to 1024] 的异常,因此建议在term过多的情况下采用filter, 而不是query。
以下是该情形在ES中的测试。
- Settings defaultSettings = ImmutableSettings.settingsBuilder().put("client.transport.sniff", true).build();
- Settings finalSettings = ImmutableSettings.settingsBuilder().put(defaultSettings)
- .put("name", NetworkUtils.getLocalAddress().getHostName()).build();
- TransportClient tmp = new TransportClient(finalSettings);
- Client client = tmp.addTransportAddress(new InetSocketTransportAddress("127.0.0.1", 9300));
- //demo 100万数据
- for (int i = 0; i < 1000000; i++)
- {
- client.prepareIndex("test2", "book",String.valueOf(i)).setSource("bookid", String.valueOf(i), "booktype", String.valueOf(i%10000)).execute()
- .actionGet();
- }
- //demo 近1万个term
- String[] values = new String[10000];
- for (int i = 1; i < 10000; i++)
- {
- values[i] = String.valueOf(i);
- }
- //terms query
- //TermsQueryBuilder termQueryBuilder = new TermsQueryBuilder("booktype", values);
- TermsFilterBuilder termsFilterBuilder = new TermsFilterBuilder("booktype", values);
- // SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(termQueryBuilder)
- // .setFrom(0).setSize(100).execute().actionGet();
- //terms filter
- SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(QueryBuilders.matchAllQuery()).setFilter(termsFilterBuilder)
- .setFrom(0).setSize(100).execute().actionGet();
- SearchHits hits = searchResponse.getHits();
- System.out.println(hits.totalHits());
- for (SearchHit searchHit : hits)
- {
- System.out.println(searchHit.getId() + ":" + searchHit.getSource().get("booktype"));
- }
上述结果会发现, 用TermsQueryBuilder查询的话, 会出现TooManyClauses的异常, 因为设置了9999个term值。因此,当term过多时,建议采用filter, 而不是query.
http://lucene-group.group.iteye.com/group/topic/10555
http://maxrocray.iteye.com/blog/1860946
相关推荐
### Lucene原理与代码分析概览 #### 一、全文检索基本原理 ...- Lucene中的TooManyClauses异常及其解决方法。 通过上述内容的学习,读者可以全面掌握Lucene的工作原理和技术细节,从而更好地应用于实际项目中。
5. **TooManyClause 异常**:当查询包含过多的子查询时,Lucene 会抛出 TooManyClauses 异常,以防止内存溢出等问题。 6. **Lucene 的事务性**:Lucene 不直接支持事务,但在设计上支持高并发环境下的安全操作,...
buct计算机系统综合设计课设
"半桥LLC谐振变换器:开环、闭环仿真设计与参数详解报告——含32页说明文档与Simulink模型",半桥LLC谐振变器开环+闭环仿真设计资料 附带一份32页配套说明文档详细参数设计、原理说明、仿真分析 开环、闭环、闭环+软启动三个Simulink仿真模型, 附带了配套设计说明报告,给出了谐振槽谐振元件参数的设计步骤,并对参数进行了完整设计。 可以参考说明文档设计不同功率等级的LLC谐振变器 ,半桥LLC谐振变换器; 开环仿真设计; 闭环仿真设计; 软启动仿真模型; 设计说明报告; 谐振槽谐振元件参数设计步骤。,"LLC谐振变换器:开环闭环仿真设计资料及详细参数说明"
证书_00008030-001079D21A53402E.zip
深入探索Comsol SOFC:固体氧化物燃料电池的仿真与性能分析(涉及温度场、气体分布及曲线表现),Comsol sofc固体氧化物燃料电池仿真(温度场分布,气体分布,极化曲线,性能曲线) ,Comsol; SOFC; 固体氧化物燃料电池; 仿真; 温度场分布; 气体分布; 极化曲线; 性能曲线,Comsol SOFC仿真:温度场与气体分布分析的极化性能曲线
学生竞赛管理系统 免费JAVA毕业设计 2024成品源码+论文+录屏+启动教程 启动教程:https://www.bilibili.com/video/BV1jKDjYrEz1 项目讲解视频:https://www.bilibili.com/video/BV1Tb421n72S 二次开发教程:https://www.bilibili.com/video/BV18i421i7Dx
deepseek最新技术文档中英文对照版
COMSOL模拟下的激光热致等离子体作用模型探究,Comsol模拟激光热致等离子体模型 ,Comsol模拟; 激光热致等离子体模型; 模型模拟。,Comsol模拟激光热等离子体模型
冲压成形虚拟仿 真实验系统构建与关键技术研究.pdf
Firefox浏览器page_assist-1.4.4离线插件,deepseep大模型一起用的WEBUI插件
《基于Comsol仿真的热光伏电池(TPV)研究》,comsol仿真:热光伏电池(TPV) #comsol仿真 ,comsol仿真;热光伏电池(TPV);光伏模拟,"Comsol仿真技术在热光伏电池(TPV)领域的应用"
"COMSOL有限元仿真模型:三相变压器多物理耦合模型中的电磁-声-结构力分析,涉及应力、磁密及声场多维研究",COMSOL有限元仿真模型,三相变压器电磁-声-结构力多物理耦合模型,应力分析,磁密分析,声场分析 ,核心关键词:COMSOL有限元仿真模型; 三相变压器; 电磁-声-结构力多物理耦合模型; 应力分析; 磁密分析; 声场分析; 关键词以分号分隔为:COMSOL有限元仿真模型;三相变压器;电磁耦合模型;声-结构力耦合模型;应力分析;磁密分析;声场分析。,有限元模型下的电磁-声-结构多物理耦合仿真研究
"灰狼算法驱动的MPPT跟踪:光照突变情况下的部分遮阴处理与波形图分析",灰狼算法实现部分遮阴的MPPT跟踪,包括光照突变情况,包括灰狼算法程序和matlab simulink模型的搭建,功率,电压,电流波形图和占空比波形图入如下。 ,核心关键词:灰狼算法; MPPT跟踪; 光照突变; 程序实现; MATLAB Simulink模型; 功率波形图; 电压波形图; 电流波形图; 占空比波形图。,灰狼算法MPPT跟踪与Simulink模型搭建
"BLDC无刷直流电机双闭环矢量控制仿真研究与实践:模型构建、性能验证与学习资源",BLDC无刷直流电机双闭环矢量控制仿真 仿真为BLDC无刷直流电机的双闭环控制,仿真配套的说明文档详细的描述的每个模块的搭建以及作用。 适合学习BLDC的同学使用学习。 仿真通过对给定转速的突变验证双闭环控制的性能,结果表明,控制相应速度快,转速跟踪稳定,能完美实现功能。 文件包括: [1]仿真模型 [2]详细说明文档 [3]相关参考文献 ,BLDC无刷直流电机;双闭环矢量控制;仿真;模块搭建;性能验证;学习;控制响应速度;转速跟踪稳定。,"BLDC无刷直流电机双闭环矢量控制仿真研究报告"
EB-PVD设备灯丝加热电源的研制.pdf
国外人形机器人技术前沿及产业发展形势研判.pdf
基于comsol技术的冻土路基水热耦合研究,comsol冻土路基水热耦合。 ,comsol; 冻土; 路基; 水热耦合,"水热耦合下的comsol冻土路基研究"
COMSOL模拟光子晶体六边形晶格结构:第一布里渊区能带简述,COMSOL光子晶体六边形晶格简约第一布里渊区能带 ,COMSOL;光子晶体;六边形晶格;简约;第一布里渊区;能带,光子晶体六边形晶格的能带研究
基于英飞凌XMC4500的永磁伺服电机液压伺服驱动器方案——高效稳定的注塑机控制解决方案,液压伺服驱动器,英飞凌XMC4500方案,为永磁伺服电机(PMSM)而开发的液压伺服方案,采用高性能的矢量控制技术,配置了针对注塑机驱动过程中的工艺过程动作特性的优化,如注塑速度、压力保持精度控制,以及与注塑机控制器配合工作时的平稳性控制,同时还具备后台软件监控、通讯总线功能,支持多种编码器类型,组合功能丰富强大,性能稳定。 此方案主要应用于塑料成型、管材挤出、制鞋、橡胶、金属压铸等行业,油压控制性能进一步提升、压力和速度响应更快,稳态压力波动更小,体积更小。 驱动板、键盘板、PG卡等)源文件,资料非常详细完整 ,液压伺服驱动器; 英飞凌XMC4500方案; 永磁伺服电机PMSM; 矢量控制技术; 注塑机驱动工艺; 速度压力控制; 通讯总线功能; 编码器类型; 塑料成型; 管材挤出; 制鞋; 橡胶; 金属压铸; 油压控制性能; 体积小。,"英飞凌XMC4500永磁伺服液压驱动方案:多行业应用的高效稳定驱动器"