`
gcgmh
  • 浏览: 354893 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

solr源码类里的一些方法

    博客分类:
  • Solr
阅读更多
//SolrIndexSearcher.java

 /**
   * 获得docID的方法
   */
  private void getDocListC(QueryResult qr, QueryCommand cmd) throws IOException {
    // old parameters: DocListAndSet out, Query query, List<Query> filterList, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
    DocListAndSet out = new DocListAndSet();
    qr.setDocListAndSet(out);
    QueryResultKey key=null;
    
    //request里传过来的要返回的document数目,默认是10条
    int maxDocRequested = cmd.getOffset() + cmd.getLen();
    
    // check for overflow, and check for # docs in index
    if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
    int supersetMaxDoc= maxDocRequested;
    DocList superset;

    // we can try and look up the complete query in the cache.
    // we can't do that if filter!=null though (we don't want to
    // do hashCode() and equals() for a big DocSet).
    if (queryResultCache != null && cmd.getFilter()==null) {
        // all of the current flags can be reused during warming,
        // so set all of them on the cache key.
    	/**
    	 * 根据用户输入的查询关键字生成的key,作为存放到queryResultCache里面的Key
    	 */
        key = new QueryResultKey(cmd.getQuery(), cmd.getFilterList(), cmd.getSort(), cmd.getFlags());
        if ((cmd.getFlags() & NO_CHECK_QCACHE)==0) {
          superset = (DocList)queryResultCache.get(key);

          if (superset != null) {
            // check that the cache entry has scores recorded if we need them
            if ((cmd.getFlags() & GET_SCORES)==0 || superset.hasScores()) {
              // NOTE: subset() returns null if the DocList has fewer docs than
              // requested
              out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
            }
          }
          if (out.docList != null) {
            // found the docList in the cache... now check if we need the docset too.
            // OPT: possible future optimization - if the doclist contains all the matches,
            // use it to make the docset instead of rerunning the query.
            if (out.docSet==null && ((cmd.getFlags() & GET_DOCSET)!=0) ) {
              if (cmd.getFilterList()==null) {
                out.docSet = getDocSet(cmd.getQuery());
              } else {
                List<Query> newList = new ArrayList<Query>(cmd.getFilterList()
.size()+1);
                newList.add(cmd.getQuery());
                newList.addAll(cmd.getFilterList());
                out.docSet = getDocSet(newList);
              }
            }
            return;
          }
        }

        // If we are going to generate the result, bump up to the
        // next resultWindowSize for better caching.

        // handle 0 special case as well as avoid idiv in the common case.
        if (maxDocRequested < queryResultWindowSize) {
          supersetMaxDoc=queryResultWindowSize;
        } else {
          supersetMaxDoc = ((maxDocRequested -1)/queryResultWindowSize + 1)*queryResultWindowSize;
          if (supersetMaxDoc < 0) supersetMaxDoc=maxDocRequested;
        }
    }


    // OK, so now we need to generate an answer.
    // One way to do that would be to check if we have an unordered list
    // of results for the base query.  If so, we can apply the filters and then
    // sort by the resulting set.  This can only be used if:
    // - the sort doesn't contain score
    // - we don't want score returned.

    // check if we should try and use the filter cache
    boolean useFilterCache=false;
    if ((cmd.getFlags() & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && cmd.getSort() != null && filterCache != null) {
      useFilterCache=true;
      SortField[] sfields = cmd.getSort().getSort();
      for (SortField sf : sfields) {
        if (sf.getType() == SortField.SCORE) {
          useFilterCache=false;
          break;
        }
      }
    }

    if (useFilterCache) {
      // now actually use the filter cache.
      // for large filters that match few documents, this may be
      // slower than simply re-executing the query.
      if (out.docSet == null) {
        out.docSet = getDocSet(cmd.getQuery(),cmd.getFilter());
        DocSet bigFilt = getDocSet(cmd.getFilterList());
        if (bigFilt != null) out.docSet = out.docSet.intersection(bigFilt);
      }
      // todo: there could be a sortDocSet that could take a list of
      // the filters instead of anding them first...
      // perhaps there should be a multi-docset-iterator
      superset = sortDocSet(out.docSet,cmd.getSort(),supersetMaxDoc);
      out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
    } else {
      // do it the normal way...
      cmd.setSupersetMaxDoc(supersetMaxDoc);
      if ((cmd.getFlags() & GET_DOCSET)!=0) {
        DocSet qDocSet = getDocListAndSetNC(qr,cmd);
        // cache the docSet matching the query w/o filtering
        if (filterCache!=null && !qr.isPartialResults()) filterCache.put(cmd.getQuery(),qDocSet);
      } else {
    	  
    	  /**
    	   * 此方法获取documentID,存放在docListAndSet对象的docList里面,
    	   */
        getDocListNC(qr,cmd);
        //Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
      }
      //
      superset = out.docList;
      out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
    }

    // lastly, put the superset in the cache if the size is less than or equal
    // to queryResultMaxDocsCached
    
    if (key != null && superset.size() <= queryResultMaxDocsCached && !qr.isPartialResults()) {
    	
      //将用户查询的关键字产生的key,和根据这个关键字查询出的DocId集合保存到queryResultCache里面去
      queryResultCache.put(key, superset);
      
    }
  }

--------------------------------------------------------------------

  /**
   * 此方法获取documentID,存放在docListAndSet对象的docList里面,
   */
  private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
    //Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
    //Query query, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
    DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList());
    final long timeAllowed = cmd.getTimeAllowed();
    int len = cmd.getSupersetMaxDoc();
    int last = len;
    if (last < 0 || last > maxDoc()) last=maxDoc();
    final int lastDocRequested = last;
    int nDocsReturned;
    int totalHits;
    float maxScore;
    int[] ids;
    float[] scores;
    
    /**
     * 产生一个query
     */
    Query query = QueryUtils.makeQueryable(cmd.getQuery());

    // handle zero case...
    if (lastDocRequested<=0) {
      final DocSet filt = filter;
      final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
      final int[] numHits = new int[1];

      HitCollector hc = new HitCollector() {
        public void collect(int doc, float score) {
          if (filt!=null && !filt.exists(doc)) return;
          numHits[0]++;
          if (score > topscore[0]) topscore[0]=score;
        }
      };
      if( timeAllowed > 0 ) {
        hc = new TimeLimitedCollector( hc, timeAllowed );
      }
      try {
        searcher.search(query, hc );
      }
      catch( TimeLimitedCollector.TimeExceededException x ) {
        log.warning( "Query: " + query + "; " + x.getMessage() );
        qr.setPartialResults(true);
      }

      nDocsReturned=0;
      ids = new int[nDocsReturned];
      scores = new float[nDocsReturned];
      totalHits = numHits[0];
      maxScore = totalHits>0 ? topscore[0] : 0.0f;
    } else if (cmd.getSort() != null) {
      // can't use TopDocs if there is a sort since it
      // will do automatic score normalization.
      // NOTE: this changed late in Lucene 1.9

      final DocSet filt = filter;
      final int[] numHits = new int[1];
      final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, cmd.getSort().getSort(), len);

      HitCollector hc = new HitCollector() {
        public void collect(int doc, float score) {
          if (filt!=null && !filt.exists(doc)) return;
          numHits[0]++;
          hq.insert(new FieldDoc(doc, score));
        }
      };
      if( timeAllowed > 0 ) {
        hc = new TimeLimitedCollector( hc, timeAllowed );
      }
      try {
        searcher.search(query, hc );
      }
      catch( TimeLimitedCollector.TimeExceededException x ) {
        log.warning( "Query: " + query + "; " + x.getMessage() );
        qr.setPartialResults(true);
      }

      totalHits = numHits[0];	//总个数
      maxScore = totalHits>0 ? hq.getMaxScore() : 0.0f;

      nDocsReturned = hq.size();
      ids = new int[nDocsReturned];
      scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
      for (int i = nDocsReturned -1; i >= 0; i--) {
        FieldDoc fieldDoc = (FieldDoc)hq.pop();
        // fillFields is the point where score normalization happens
        // hq.fillFields(fieldDoc)
        ids[i] = fieldDoc.doc;
        if (scores != null) scores[i] = fieldDoc.score;
      }
    } else {
      // No Sort specified (sort by score descending)
      // This case could be done with TopDocs, but would currently require
      // getting a BitSet filter from a DocSet which may be inefficient.

      final DocSet filt = filter;
      final ScorePriorityQueue hq = new ScorePriorityQueue(lastDocRequested);
      final int[] numHits = new int[1];
      HitCollector hc = new HitCollector() {
        float minScore=Float.NEGATIVE_INFINITY;  // minimum score in the priority queue
        public void collect(int doc, float score) {
          if (filt!=null && !filt.exists(doc)) return;
          if (numHits[0]++ < lastDocRequested || score >= minScore) {
            // TODO: if docs are always delivered in order, we could use "score>minScore"
            // instead of "score>=minScore" and avoid tiebreaking scores
            // in the priority queue.
            // but might BooleanScorer14 might still be used and deliver docs out-of-order?
            hq.insert(new ScoreDoc(doc, score));
            minScore = ((ScoreDoc)hq.top()).score;
          }
        }
      };
      if( timeAllowed > 0 ) {
        hc = new TimeLimitedCollector( hc, timeAllowed );
      }
      try {
    	/**
    	 * 查询,把查询的结果放到hq里面
    	 */
        searcher.search(query, hc );
        
        
      }
      catch( TimeLimitedCollector.TimeExceededException x ) {
        log.warning( "Query: " + query + "; " + x.getMessage() );
        qr.setPartialResults(true);
      }

      totalHits = numHits[0];
      nDocsReturned = hq.size();
      ids = new int[nDocsReturned];
      scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
      ScoreDoc sdoc =null;
      for (int i = nDocsReturned -1; i >= 0; i--) {
        sdoc = (ScoreDoc)hq.pop();
        
        ids[i] = sdoc.doc;
        if (scores != null) scores[i] = sdoc.score;
      }
      maxScore = sdoc ==null ? 0.0f : sdoc.score;
    }


    int sliceLen = Math.min(lastDocRequested,nDocsReturned);
    if (sliceLen < 0) sliceLen=0;
    qr.setDocList(new DocSlice(0,sliceLen,ids,scores,totalHits,maxScore));



    /**************** older implementation using TopDocs *******************


      Filter lfilter=null;
      if (filter != null) {
        final BitSet bits = filter.getBits();   // avoid if possible
        lfilter = new Filter() {
          public BitSet bits(IndexReader reader)  {
            return bits;
          }
        };
      }

      int lastDocRequested=offset+len;

      // lucene doesn't allow 0 to be passed for nDocs
      if (lastDocRequested==0) lastDocRequested=1;

      // TopFieldDocs sortedDocs;  // use TopDocs so both versions can use it
      TopDocs sortedDocs;
      if (lsort!=null) {
         sortedDocs = searcher.search(query, lfilter, lastDocRequested, lsort);
      } else {
         sortedDocs = searcher.search(query, lfilter, lastDocRequested);
      }

      int nDocsReturned = sortedDocs.scoreDocs.length;
      int[] docs = new int[nDocsReturned];
      for (int i=0; i<nDocsReturned; i++) {
        docs[i] = sortedDocs.scoreDocs[i].doc;
      }
      float[] scores=null;
      float maxScore=0.0f;
      if ((flags & GET_SCORES) != 0) {
        scores = new float[nDocsReturned];
        for (int i=0; i<nDocsReturned; i++) {
          scores[i] = sortedDocs.scoreDocs[i].score;
        }
        if (nDocsReturned>0) {
          maxScore=sortedDocs.scoreDocs[0].score;
        }
      }
      int sliceLen = Math.min(offset+len,nDocsReturned) - offset;
      if (sliceLen < 0) sliceLen=0;
      return new DocSlice(offset,sliceLen,docs,scores,sortedDocs.totalHits, maxScore);

    **********************************************************************************/

  }






//SolrIndexSearcher.java

/**
   * Retrieve the {@link Document} instance corresponding to the document id.
   *
   * Note: The document will have all fields accessable, but if a field
   * filter is provided, only the provided fields will be loaded (the 
   * remainder will be available lazily).
   * 此方法从传入的docId号,到documentCached里面获取document,若没有获取到,这到索引里获取document,并加入到documentCached里面
   */
  public Document doc(int i, Set<String> fields) throws IOException {
	  
	  log.info("docId: " + i);
	  
    
    Document d;
    if (documentCache != null) {
      //重缓存里获取
      d = (Document)documentCache.get(i);
      if (d!=null) return d;  //获取到返回
    }

    if(!enableLazyFieldLoading || fields == null) {
      d = searcher.getIndexReader().document(i);
    } else {
      //直接到索引里获取document
      d = searcher.getIndexReader().document(i, 
             new SetNonLazyFieldSelector(fields));
    }

    if (documentCache != null) {
      //加入到documentCached缓存里面
      documentCache.put(i, d);
    }

    return d;
  }



分享到:
评论

相关推荐

    solr-6.2.0源码

    通过分析Solr 6.2.0的源码,我们可以深入了解其内部工作机制,包括索引构建、查询处理、分布式协调等核心模块。源码中包含了丰富的注释和示例,帮助开发者深入理解Solr的设计思想和实现细节。 总结来说,Solr 6.2.0...

    solr源码及文档

    solr全文检索,里面包含文档,源代码,jar包,使用的是solr4.2,东西比较全,安装文档就能跑起来,,适合参考借鉴

    lucene-solr源码,编译成的idea项目源码

    本人用ant idea命令花了214分钟,35秒编译的lucene-solr源码,可以用idea打开,把项目放在D:\space\study\java\lucene-solr路径下,再用idea打开就行了

    solr6.6.0源码

    接下来,我们将深入探讨 Solr 6.6.0 中的一些关键知识点。 一、Solr 架构与组件 Solr 的核心架构基于 Lucene 库,它提供了一个可扩展、高性能的搜索平台。主要组件包括: 1. **索引库**:Solr 使用 Lucene 来创建、...

    solr(solr-9.0.0-src.tgz)源码

    源码分析是深入理解一个软件系统工作原理的重要途径,对于Solr这样的复杂系统尤其如此。这里我们将围绕"solr-9.0.0-src.tgz"这个源码包,详细探讨其主要组成部分、核心功能以及开发过程中的关键知识点。 1. **Solr...

    Solr 源码包Part2

    Solr的安装部署包,只能分卷上传,稍后上传依赖jar包,及部署攻略

    solr 4.10源码

    4. **学习Solr源码** - **阅读源码**:从`src`目录开始,可以了解Solr的架构设计,比如RequestHandlers、QueryParsers是如何工作的。 - **理解索引过程**:查看`solr-core`模块中的`IndexWriter`和`UpdateHandler`...

    Solr项目源码及solr资源包

    Solr项目源码及solr资源包是一个针对搜索引擎平台Apache Solr的学习与实践资源集合,主要结合了Spring Data Solr框架进行操作。这个项目旨在帮助开发者更好地理解和运用Solr进行数据索引和检索。让我们详细地探讨...

    solr源码搜索引擎

    基于lucene的企业级搜索引擎。是一个独立的企业级搜索应用服务器,它对外提供类似于Web-service的API接口。...也可以通过Http G Solr et操作提出查找请求,并得到XML格式的返回结果。仅仅从官方提取的源码。

    Solr-search过程源码分析

    在深入探讨Solr-search过程的源码分析时,我们聚焦于关键步骤与核心组件,以求全面理解Solr搜索机制的内部运作。Solr作为一款高性能、可伸缩的开源搜索平台,其搜索处理流程涉及多个层次的组件交互与数据处理,其中...

    solr-4.5源码包

    在这个源码包中,我们可以深入理解Solr的工作原理以及其核心组件的实现。 首先,让我们了解Solr的基本架构。Solr基于Lucene库构建,Lucene是一个高性能、全文本检索库。Solr在其之上添加了分布式处理、集群管理、...

    solr5的ik中文分词器源码

    源码中会有相关的接口和类供开发者参考。 7. **打包成JAR**: 解压后的源码需要通过Maven或其他构建工具(如Gradle)进行编译和打包,生成的JAR文件才能被Solr识别并使用。这通常涉及`mvn clean package`命令。 8...

    solr-5.2.1-src.tgz源码

    Solr源码在MyEclipse下的搭建 1. 下载并按装Ant 下载地址: http://ant.apache.org/bindownload.cgi Ant环境变量配置: ANT_HOME: E:\Program Files\apache-ant-1.9.0 Path: %ANT_HOME%\bin 在cmd中输入ant -v...

    maven 搭建solr4.2源码环境

    maven 整合solr4.2环境,另外整合了solr-data-import源码环境,资源10分,十分不贵!有需要的朋友请下载吧。花了我3个小时的时间整理的。下载后,使用maven导入即可使用,升级solr版本也比较方便。本环境使用了...

    solr-5.2.1.part1.rar 编译第1部分,共2部分

    Solr源码在MyEclipse下的搭建 1. 下载并按装Ant 下载地址: http://ant.apache.org/bindownload.cgi Ant环境变量配置: ANT_HOME: E:\Program Files\apache-ant-1.9.0 Path: %ANT_HOME%\bin 在cmd中输入ant -v...

    SpringBoot整合Solr案例源码

    `saveProduct`方法用于将产品对象保存到Solr索引,`searchByKeyword`方法使用Solr的查询能力找到包含指定关键词的产品。 在`SpringBoot-Solr-demo`的源码中,你可能会看到一个完整的Spring Boot应用,包含了上述...

    毕业设计基于Springboot的个人博客系统源码,集成solr服务器.zip

    毕业设计基于Springboot的个人博客系统源码,集成solr服务器毕业设计基于Springboot的个人博客系统源码,集成solr服务器毕业设计基于Springboot的个人博客系统源码,集成solr服务器毕业设计基于Springboot的个人博客...

    solr更换memcached缓存的方法

    在更换缓存机制时,这些类可能会参与缓存的生成和查找过程,但具体实现取决于Solr的缓存接口和实现。 总之,通过集成Memcached,Solr能够利用更强大的分布式缓存能力,提升搜索性能,尤其是在高并发和大数据量场景...

    solr5.4.0完整包

    Solr 依存于Lucene,因为Solr底层的核心技术是使用Lucene 来实现的,Solr和Lucene的本质区别有以下三点:搜索服务器,企业级和管理。Lucene本质上是搜索库,不是独立的应用程序,而Solr是。Lucene专注于搜索底层的...

    solr6--solr-dataimporthandler-scheduler-1.1

    Solr是Apache Lucene项目下的一个企业级搜索服务器,它提供了全文检索、命中高亮、 faceted search(分面搜索)等多种功能...通过深入理解DIH的工作原理和配置方法,我们可以更好地利用这一工具来优化我们的Solr部署。

Global site tag (gtag.js) - Google Analytics