BroadcastShardOperationFailedException TooManyClauses[maxClauseCount is set to

m635674608

浏览: 5087724 次
性别:
来自: 南京

最近访客更多访客>>

wusuosuo

yijiaomuqing

millerchu

xdung

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

异常

luncen 查询条件不能大于1024

package org.apache.lucene.search;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;

/** A Query that matches documents matching boolean combinations of other
  * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
  * BooleanQuerys.
  */
public class BooleanQuery extends Query implements Iterable<BooleanClause> {

  private static int maxClauseCount = 10240;

  /** Thrown when an attempt is made to add more than {@link
   * #getMaxClauseCount()} clauses. This typically happens if
   * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery 
   * is expanded to many terms during search. 
   */
  public static class TooManyClauses extends RuntimeException {
    public TooManyClauses() {
      super("maxClauseCount is set to " + maxClauseCount);
    }
  }

  /** Return the maximum number of clauses permitted, 1024 by default.
   * Attempts to add more than the permitted number of clauses cause {@link
   * TooManyClauses} to be thrown.
   * @see #setMaxClauseCount(int)
   */
  public static int getMaxClauseCount() { return maxClauseCount; }

  /** 
   * Set the maximum number of clauses permitted per BooleanQuery.
   * Default value is 1024.
   */
  public static void setMaxClauseCount(int maxClauseCount) {
    if (maxClauseCount < 1) {
      throw new IllegalArgumentException("maxClauseCount must be >= 1");
    }
    BooleanQuery.maxClauseCount = maxClauseCount;
  }

  private ArrayList<BooleanClause> clauses = new ArrayList<BooleanClause>();
  private final boolean disableCoord;

  /** Constructs an empty boolean query. */
  public BooleanQuery() {
    disableCoord = false;
  }

  /** Constructs an empty boolean query.
   *
   * {@link Similarity#coord(int,int)} may be disabled in scoring, as
   * appropriate. For example, this score factor does not make sense for most
   * automatically generated queries, like {@link WildcardQuery} and {@link
   * FuzzyQuery}.
   *
   * @param disableCoord disables {@link Similarity#coord(int,int)} in scoring.
   */
  public BooleanQuery(boolean disableCoord) {
    this.disableCoord = disableCoord;
  }

  /** Returns true iff {@link Similarity#coord(int,int)} is disabled in
   * scoring for this query instance.
   * @see #BooleanQuery(boolean)
   */
  public boolean isCoordDisabled() { return disableCoord; }

  /**
   * Specifies a minimum number of the optional BooleanClauses
   * which must be satisfied.
   *
   * <p>
   * By default no optional clauses are necessary for a match
   * (unless there are no required clauses).  If this method is used,
   * then the specified number of clauses is required.
   * </p>
   * <p>
   * Use of this method is totally independent of specifying that
   * any specific clauses are required (or prohibited).  This number will
   * only be compared against the number of matching optional clauses.
   * </p>
   *
   * @param min the number of optional clauses that must match
   */
  public void setMinimumNumberShouldMatch(int min) {
    this.minNrShouldMatch = min;
  }
  protected int minNrShouldMatch = 0;

  /**
   * Gets the minimum number of the optional BooleanClauses
   * which must be satisfied.
   */
  public int getMinimumNumberShouldMatch() {
    return minNrShouldMatch;
  }

  /** Adds a clause to a boolean query.
   *
   * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
   * @see #getMaxClauseCount()
   */
  public void add(Query query, BooleanClause.Occur occur) {
    add(new BooleanClause(query, occur));
  }

  /** Adds a clause to a boolean query.
   * @throws TooManyClauses if the new number of clauses exceeds the maximum clause number
   * @see #getMaxClauseCount()
   */
  public void add(BooleanClause clause) {
    if (clauses.size() >= maxClauseCount) {
      throw new TooManyClauses();
    }

    clauses.add(clause);
  }

  /** Returns the set of clauses in this query. */
  public BooleanClause[] getClauses() {
    return clauses.toArray(new BooleanClause[clauses.size()]);
  }

  /** Returns the list of clauses in this query. */
  public List<BooleanClause> clauses() { return clauses; }

  /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to
   * make it possible to do:
   * <pre class="prettyprint">for (BooleanClause clause : booleanQuery) {}</pre>
   */
  @Override
  public final Iterator<BooleanClause> iterator() { return clauses().iterator(); }

  /**
   * Expert: the Weight for BooleanQuery, used to
   * normalize, score and explain these queries.
   *
   * <p>NOTE: this API and implementation is subject to
   * change suddenly in the next release.</p>
   */
  protected class BooleanWeight extends Weight {
    /** The Similarity implementation. */
    protected Similarity similarity;
    protected ArrayList<Weight> weights;
    protected int maxCoord;  // num optional + num required
    private final boolean disableCoord;

    public BooleanWeight(IndexSearcher searcher, boolean disableCoord)
      throws IOException {
      this.similarity = searcher.getSimilarity();
      this.disableCoord = disableCoord;
      weights = new ArrayList<Weight>(clauses.size());
      for (int i = 0 ; i < clauses.size(); i++) {
        BooleanClause c = clauses.get(i);
        Weight w = c.getQuery().createWeight(searcher);
        weights.add(w);
        if (!c.isProhibited()) {
          maxCoord++;
        }
      }
    }

    @Override
    public Query getQuery() { return BooleanQuery.this; }

    @Override
    public float getValueForNormalization() throws IOException {
      float sum = 0.0f;
      for (int i = 0 ; i < weights.size(); i++) {
        // call sumOfSquaredWeights for all clauses in case of side effects
        float s = weights.get(i).getValueForNormalization();         // sum sub weights
        if (!clauses.get(i).isProhibited()) {
          // only add to sum for non-prohibited clauses
          sum += s;
        }
      }

      sum *= getBoost() * getBoost();             // boost each sub-weight

      return sum ;
    }

    public float coord(int overlap, int maxOverlap) {
      // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
      // so coord() is not applied. But when BQ cannot optimize itself away
      // for a single clause (minNrShouldMatch, prohibited clauses, etc), its
      // important not to apply coord(1,1) for consistency, it might not be 1.0F
      return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap);
    }

    @Override
    public void normalize(float norm, float topLevelBoost) {
      topLevelBoost *= getBoost();                         // incorporate boost
      for (Weight w : weights) {
        // normalize all clauses, (even if prohibited in case of side affects)
        w.normalize(norm, topLevelBoost);
      }
    }

    @Override
    public Explanation explain(AtomicReaderContext context, int doc)
      throws IOException {
      final int minShouldMatch =
        BooleanQuery.this.getMinimumNumberShouldMatch();
      ComplexExplanation sumExpl = new ComplexExplanation();
      sumExpl.setDescription("sum of:");
      int coord = 0;
      float sum = 0.0f;
      boolean fail = false;
      int shouldMatchCount = 0;
      Iterator<BooleanClause> cIter = clauses.iterator();
      for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) {
        Weight w = wIter.next();
        BooleanClause c = cIter.next();
        if (w.scorer(context, true, true, context.reader().getLiveDocs()) == null) {
          if (c.isRequired()) {
            fail = true;
            Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
            sumExpl.addDetail(r);
          }
          continue;
        }
        Explanation e = w.explain(context, doc);
        if (e.isMatch()) {
          if (!c.isProhibited()) {
            sumExpl.addDetail(e);
            sum += e.getValue();
            coord++;
          } else {
            Explanation r =
              new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")");
            r.addDetail(e);
            sumExpl.addDetail(r);
            fail = true;
          }
          if (c.getOccur() == Occur.SHOULD) {
            shouldMatchCount++;
          }
        } else if (c.isRequired()) {
          Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")");
          r.addDetail(e);
          sumExpl.addDetail(r);
          fail = true;
        }
      }
      if (fail) {
        sumExpl.setMatch(Boolean.FALSE);
        sumExpl.setValue(0.0f);
        sumExpl.setDescription
          ("Failure to meet condition(s) of required/prohibited clause(s)");
        return sumExpl;
      } else if (shouldMatchCount < minShouldMatch) {
        sumExpl.setMatch(Boolean.FALSE);
        sumExpl.setValue(0.0f);
        sumExpl.setDescription("Failure to match minimum number "+
                               "of optional clauses: " + minShouldMatch);
        return sumExpl;
      }
      
      sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE);
      sumExpl.setValue(sum);
      
      final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord);
      if (coordFactor == 1.0f) {
        return sumExpl;                             // eliminate wrapper
      } else {
        ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(),
                                                           sum*coordFactor,
                                                           "product of:");
        result.addDetail(sumExpl);
        result.addDetail(new Explanation(coordFactor,
                                         "coord("+coord+"/"+maxCoord+")"));
        return result;
      }
    }

    @Override
    public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
        boolean topScorer, Bits acceptDocs)
        throws IOException {
      List<Scorer> required = new ArrayList<Scorer>();
      List<Scorer> prohibited = new ArrayList<Scorer>();
      List<Scorer> optional = new ArrayList<Scorer>();
      Iterator<BooleanClause> cIter = clauses.iterator();
      for (Weight w  : weights) {
        BooleanClause c =  cIter.next();
        Scorer subScorer = w.scorer(context, true, false, acceptDocs);
        if (subScorer == null) {
          if (c.isRequired()) {
            return null;
          }
        } else if (c.isRequired()) {
          required.add(subScorer);
        } else if (c.isProhibited()) {
          prohibited.add(subScorer);
        } else {
          optional.add(subScorer);
        }
      }

      // NOTE: we could also use BooleanScorer, if we knew
      // this BooleanQuery was embedded in another
      // BooleanQuery that was also using BooleanScorer (ie,
      // BooleanScorer can nest).  But this is hard to
      // detect and we never do so today... (ie, we only
      // return BooleanScorer for topScorer):

      // Check if we can and should return a BooleanScorer
      // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch
      // but the same is even true of pure conjunctions...
      if (!scoreDocsInOrder && topScorer && required.size() == 0 && minNrShouldMatch <= 1) {
        return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord);
      }
      
      if (required.size() == 0 && optional.size() == 0) {
        // no required and optional clauses.
        return null;
      } else if (optional.size() < minNrShouldMatch) {
        // either >1 req scorer, or there are 0 req scorers and at least 1
        // optional scorer. Therefore if there are not enough optional scorers
        // no documents will be matched by the query
        return null;
      }
      
      // simple conjunction
      if (optional.size() == 0 && prohibited.size() == 0) {
        float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
        return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
      }
      
      // simple disjunction
      if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
        float coord[] = new float[optional.size()+1];
        for (int i = 0; i < coord.length; i++) {
          coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
        }
        return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
      }
      
      // Return a BooleanScorer2
      return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
    }
    
    @Override
    public boolean scoresDocsOutOfOrder() {
      for (BooleanClause c : clauses) {
        if (c.isRequired()) {
          return false; // BS2 (in-order) will be used by scorer()
        }
      }
      
      // scorer() will return an out-of-order scorer if requested.
      return true;
    }
    
  }

  @Override
  public Weight createWeight(IndexSearcher searcher) throws IOException {
    return new BooleanWeight(searcher, disableCoord);
  }

  @Override
  public Query rewrite(IndexReader reader) throws IOException {
    if (minNrShouldMatch == 0 && clauses.size() == 1) {                    // optimize 1-clause queries
      BooleanClause c = clauses.get(0);
      if (!c.isProhibited()) {  // just return clause

        Query query = c.getQuery().rewrite(reader);    // rewrite first

        if (getBoost() != 1.0f) {                 // incorporate boost
          if (query == c.getQuery()) {                   // if rewrite was no-op
            query = query.clone();         // then clone before boost
          }
          // Since the BooleanQuery only has 1 clause, the BooleanQuery will be
          // written out. Therefore the rewritten Query's boost must incorporate both
          // the clause's boost, and the boost of the BooleanQuery itself
          query.setBoost(getBoost() * query.getBoost());
        }

        return query;
      }
    }

    BooleanQuery clone = null;                    // recursively rewrite
    for (int i = 0 ; i < clauses.size(); i++) {
      BooleanClause c = clauses.get(i);
      Query query = c.getQuery().rewrite(reader);
      if (query != c.getQuery()) {                     // clause rewrote: must clone
        if (clone == null) {
          // The BooleanQuery clone is lazily initialized so only initialize
          // it if a rewritten clause differs from the original clause (and hasn't been
          // initialized already).  If nothing differs, the clone isn't needlessly created
          clone = this.clone();
        }
        clone.clauses.set(i, new BooleanClause(query, c.getOccur()));
      }
    }
    if (clone != null) {
      return clone;                               // some clauses rewrote
    } else {
      return this;                                // no clauses rewrote
    }
  }

  // inherit javadoc
  @Override
  public void extractTerms(Set<Term> terms) {
    for (BooleanClause clause : clauses) {
      if (clause.getOccur() != Occur.MUST_NOT) {
        clause.getQuery().extractTerms(terms);
      }
    }
  }

  @Override @SuppressWarnings("unchecked")
  public BooleanQuery clone() {
    BooleanQuery clone = (BooleanQuery)super.clone();
    clone.clauses = (ArrayList<BooleanClause>) this.clauses.clone();
    return clone;
  }

  /** Prints a user-readable version of this query. */
  @Override
  public String toString(String field) {
    StringBuilder buffer = new StringBuilder();
    boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0;
    if (needParens) {
      buffer.append("(");
    }

    for (int i = 0 ; i < clauses.size(); i++) {
      BooleanClause c = clauses.get(i);
      if (c.isProhibited()) {
        buffer.append("-");
      } else if (c.isRequired()) {
        buffer.append("+");
      }

      Query subQuery = c.getQuery();
      if (subQuery != null) {
        if (subQuery instanceof BooleanQuery) {  // wrap sub-bools in parens
          buffer.append("(");
          buffer.append(subQuery.toString(field));
          buffer.append(")");
        } else {
          buffer.append(subQuery.toString(field));
        }
      } else {
        buffer.append("null");
      }

      if (i != clauses.size()-1) {
        buffer.append(" ");
      }
    }

    if (needParens) {
      buffer.append(")");
    }

    if (getMinimumNumberShouldMatch()>0) {
      buffer.append('~');
      buffer.append(getMinimumNumberShouldMatch());
    }

    if (getBoost() != 1.0f) {
      buffer.append(ToStringUtils.boost(getBoost()));
    }

    return buffer.toString();
  }

  /** Returns true iff <code>o</code> is equal to this. */
  @Override
  public boolean equals(Object o) {
    if (!(o instanceof BooleanQuery)) {
      return false;
    }
    BooleanQuery other = (BooleanQuery)o;
    return this.getBoost() == other.getBoost()
        && this.clauses.equals(other.clauses)
        && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch()
        && this.disableCoord == other.disableCoord;
  }

  /** Returns a hash code value for this object.*/
  @Override
  public int hashCode() {
    return Float.floatToIntBits(getBoost()) ^ clauses.hashCode()
      + getMinimumNumberShouldMatch() + (disableCoord ? 17:0);
  }
  
}

下面是在网上看到得一个解决方法：

可以通过设置：
BooleanQuery.setMaxClauseCount(10000);
来解决问题，但是这样带来的问题是会使得内存开销加大。容易出现OutOfMemory的异常所以需要非常谨慎处理。

Lucene在做大量term值查询时，如果这值过多，超1024个term的话，会出现

TooManyClauses[maxClauseCount is set to 1024] 的异常，因此建议在term过多的情况下采用filter, 而不是query。

以下是该情形在ES中的测试。

Java代码  
Settings defaultSettings = ImmutableSettings.settingsBuilder().put("client.transport.sniff", true).build();  
        Settings finalSettings = ImmutableSettings.settingsBuilder().put(defaultSettings)  
                .put("name", NetworkUtils.getLocalAddress().getHostName()).build();  
        TransportClient tmp = new TransportClient(finalSettings);  
        Client client = tmp.addTransportAddress(new InetSocketTransportAddress("127.0.0.1", 9300));  
        //demo 100万数据  
        for (int i = 0; i < 1000000; i++)  
        {  
            client.prepareIndex("test2", "book",String.valueOf(i)).setSource("bookid", String.valueOf(i), "booktype", String.valueOf(i%10000)).execute()  
            .actionGet();  
        }  
        //demo 近1万个term  
        String[] values = new String[10000];  
        for (int i = 1; i < 10000; i++)  
        {  
            values[i] = String.valueOf(i);  
        }  
        //terms query  
        //TermsQueryBuilder termQueryBuilder = new TermsQueryBuilder("booktype", values);  
        TermsFilterBuilder termsFilterBuilder = new TermsFilterBuilder("booktype", values);  
//      SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(termQueryBuilder)  
//              .setFrom(0).setSize(100).execute().actionGet();  
        //terms filter  
        SearchResponse searchResponse = client.prepareSearch().setIndices("test2").setQuery(QueryBuilders.matchAllQuery()).setFilter(termsFilterBuilder)  
                .setFrom(0).setSize(100).execute().actionGet();  
        SearchHits hits = searchResponse.getHits();  
        System.out.println(hits.totalHits());  
        for (SearchHit searchHit : hits)  
        {  
            System.out.println(searchHit.getId() + ":" + searchHit.getSource().get("booktype"));  
        }  

上述结果会发现，用TermsQueryBuilder查询的话，会出现TooManyClauses的异常，因为设置了9999个term值。因此，当term过多时，建议采用filter，而不是query.

http://lucene-group.group.iteye.com/group/topic/10555

http://maxrocray.iteye.com/blog/1860946

分享到：

BroadcastShardOperationFailedException[[ ... | java 交集性能测试

2015-12-01 17:42
浏览 3582
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论