//SolrIndexSearcher.java
/**
* 获得docID的方法
*/
private void getDocListC(QueryResult qr, QueryCommand cmd) throws IOException {
// old parameters: DocListAndSet out, Query query, List<Query> filterList, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
QueryResultKey key=null;
//request里传过来的要返回的document数目,默认是10条
int maxDocRequested = cmd.getOffset() + cmd.getLen();
// check for overflow, and check for # docs in index
if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
int supersetMaxDoc= maxDocRequested;
DocList superset;
// we can try and look up the complete query in the cache.
// we can't do that if filter!=null though (we don't want to
// do hashCode() and equals() for a big DocSet).
if (queryResultCache != null && cmd.getFilter()==null) {
// all of the current flags can be reused during warming,
// so set all of them on the cache key.
/**
* 根据用户输入的查询关键字生成的key,作为存放到queryResultCache里面的Key
*/
key = new QueryResultKey(cmd.getQuery(), cmd.getFilterList(), cmd.getSort(), cmd.getFlags());
if ((cmd.getFlags() & NO_CHECK_QCACHE)==0) {
superset = (DocList)queryResultCache.get(key);
if (superset != null) {
// check that the cache entry has scores recorded if we need them
if ((cmd.getFlags() & GET_SCORES)==0 || superset.hasScores()) {
// NOTE: subset() returns null if the DocList has fewer docs than
// requested
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}
}
if (out.docList != null) {
// found the docList in the cache... now check if we need the docset too.
// OPT: possible future optimization - if the doclist contains all the matches,
// use it to make the docset instead of rerunning the query.
if (out.docSet==null && ((cmd.getFlags() & GET_DOCSET)!=0) ) {
if (cmd.getFilterList()==null) {
out.docSet = getDocSet(cmd.getQuery());
} else {
List<Query> newList = new ArrayList<Query>(cmd.getFilterList()
.size()+1);
newList.add(cmd.getQuery());
newList.addAll(cmd.getFilterList());
out.docSet = getDocSet(newList);
}
}
return;
}
}
// If we are going to generate the result, bump up to the
// next resultWindowSize for better caching.
// handle 0 special case as well as avoid idiv in the common case.
if (maxDocRequested < queryResultWindowSize) {
supersetMaxDoc=queryResultWindowSize;
} else {
supersetMaxDoc = ((maxDocRequested -1)/queryResultWindowSize + 1)*queryResultWindowSize;
if (supersetMaxDoc < 0) supersetMaxDoc=maxDocRequested;
}
}
// OK, so now we need to generate an answer.
// One way to do that would be to check if we have an unordered list
// of results for the base query. If so, we can apply the filters and then
// sort by the resulting set. This can only be used if:
// - the sort doesn't contain score
// - we don't want score returned.
// check if we should try and use the filter cache
boolean useFilterCache=false;
if ((cmd.getFlags() & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && cmd.getSort() != null && filterCache != null) {
useFilterCache=true;
SortField[] sfields = cmd.getSort().getSort();
for (SortField sf : sfields) {
if (sf.getType() == SortField.SCORE) {
useFilterCache=false;
break;
}
}
}
if (useFilterCache) {
// now actually use the filter cache.
// for large filters that match few documents, this may be
// slower than simply re-executing the query.
if (out.docSet == null) {
out.docSet = getDocSet(cmd.getQuery(),cmd.getFilter());
DocSet bigFilt = getDocSet(cmd.getFilterList());
if (bigFilt != null) out.docSet = out.docSet.intersection(bigFilt);
}
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
superset = sortDocSet(out.docSet,cmd.getSort(),supersetMaxDoc);
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
} else {
// do it the normal way...
cmd.setSupersetMaxDoc(supersetMaxDoc);
if ((cmd.getFlags() & GET_DOCSET)!=0) {
DocSet qDocSet = getDocListAndSetNC(qr,cmd);
// cache the docSet matching the query w/o filtering
if (filterCache!=null && !qr.isPartialResults()) filterCache.put(cmd.getQuery(),qDocSet);
} else {
/**
* 此方法获取documentID,存放在docListAndSet对象的docList里面,
*/
getDocListNC(qr,cmd);
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
}
//
superset = out.docList;
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}
// lastly, put the superset in the cache if the size is less than or equal
// to queryResultMaxDocsCached
if (key != null && superset.size() <= queryResultMaxDocsCached && !qr.isPartialResults()) {
//将用户查询的关键字产生的key,和根据这个关键字查询出的DocId集合保存到queryResultCache里面去
queryResultCache.put(key, superset);
}
}
--------------------------------------------------------------------
/**
* 此方法获取documentID,存放在docListAndSet对象的docList里面,
*/
private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
//Query query, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList());
final long timeAllowed = cmd.getTimeAllowed();
int len = cmd.getSupersetMaxDoc();
int last = len;
if (last < 0 || last > maxDoc()) last=maxDoc();
final int lastDocRequested = last;
int nDocsReturned;
int totalHits;
float maxScore;
int[] ids;
float[] scores;
/**
* 产生一个query
*/
Query query = QueryUtils.makeQueryable(cmd.getQuery());
// handle zero case...
if (lastDocRequested<=0) {
final DocSet filt = filter;
final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
final int[] numHits = new int[1];
HitCollector hc = new HitCollector() {
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
numHits[0]++;
if (score > topscore[0]) topscore[0]=score;
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
searcher.search(query, hc );
}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
nDocsReturned=0;
ids = new int[nDocsReturned];
scores = new float[nDocsReturned];
totalHits = numHits[0];
maxScore = totalHits>0 ? topscore[0] : 0.0f;
} else if (cmd.getSort() != null) {
// can't use TopDocs if there is a sort since it
// will do automatic score normalization.
// NOTE: this changed late in Lucene 1.9
final DocSet filt = filter;
final int[] numHits = new int[1];
final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, cmd.getSort().getSort(), len);
HitCollector hc = new HitCollector() {
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
numHits[0]++;
hq.insert(new FieldDoc(doc, score));
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
searcher.search(query, hc );
}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
totalHits = numHits[0]; //总个数
maxScore = totalHits>0 ? hq.getMaxScore() : 0.0f;
nDocsReturned = hq.size();
ids = new int[nDocsReturned];
scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
for (int i = nDocsReturned -1; i >= 0; i--) {
FieldDoc fieldDoc = (FieldDoc)hq.pop();
// fillFields is the point where score normalization happens
// hq.fillFields(fieldDoc)
ids[i] = fieldDoc.doc;
if (scores != null) scores[i] = fieldDoc.score;
}
} else {
// No Sort specified (sort by score descending)
// This case could be done with TopDocs, but would currently require
// getting a BitSet filter from a DocSet which may be inefficient.
final DocSet filt = filter;
final ScorePriorityQueue hq = new ScorePriorityQueue(lastDocRequested);
final int[] numHits = new int[1];
HitCollector hc = new HitCollector() {
float minScore=Float.NEGATIVE_INFINITY; // minimum score in the priority queue
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
if (numHits[0]++ < lastDocRequested || score >= minScore) {
// TODO: if docs are always delivered in order, we could use "score>minScore"
// instead of "score>=minScore" and avoid tiebreaking scores
// in the priority queue.
// but might BooleanScorer14 might still be used and deliver docs out-of-order?
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score;
}
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
/**
* 查询,把查询的结果放到hq里面
*/
searcher.search(query, hc );
}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}
totalHits = numHits[0];
nDocsReturned = hq.size();
ids = new int[nDocsReturned];
scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
ScoreDoc sdoc =null;
for (int i = nDocsReturned -1; i >= 0; i--) {
sdoc = (ScoreDoc)hq.pop();
ids[i] = sdoc.doc;
if (scores != null) scores[i] = sdoc.score;
}
maxScore = sdoc ==null ? 0.0f : sdoc.score;
}
int sliceLen = Math.min(lastDocRequested,nDocsReturned);
if (sliceLen < 0) sliceLen=0;
qr.setDocList(new DocSlice(0,sliceLen,ids,scores,totalHits,maxScore));
/**************** older implementation using TopDocs *******************
Filter lfilter=null;
if (filter != null) {
final BitSet bits = filter.getBits(); // avoid if possible
lfilter = new Filter() {
public BitSet bits(IndexReader reader) {
return bits;
}
};
}
int lastDocRequested=offset+len;
// lucene doesn't allow 0 to be passed for nDocs
if (lastDocRequested==0) lastDocRequested=1;
// TopFieldDocs sortedDocs; // use TopDocs so both versions can use it
TopDocs sortedDocs;
if (lsort!=null) {
sortedDocs = searcher.search(query, lfilter, lastDocRequested, lsort);
} else {
sortedDocs = searcher.search(query, lfilter, lastDocRequested);
}
int nDocsReturned = sortedDocs.scoreDocs.length;
int[] docs = new int[nDocsReturned];
for (int i=0; i<nDocsReturned; i++) {
docs[i] = sortedDocs.scoreDocs[i].doc;
}
float[] scores=null;
float maxScore=0.0f;
if ((flags & GET_SCORES) != 0) {
scores = new float[nDocsReturned];
for (int i=0; i<nDocsReturned; i++) {
scores[i] = sortedDocs.scoreDocs[i].score;
}
if (nDocsReturned>0) {
maxScore=sortedDocs.scoreDocs[0].score;
}
}
int sliceLen = Math.min(offset+len,nDocsReturned) - offset;
if (sliceLen < 0) sliceLen=0;
return new DocSlice(offset,sliceLen,docs,scores,sortedDocs.totalHits, maxScore);
**********************************************************************************/
}
//SolrIndexSearcher.java
/**
* Retrieve the {@link Document} instance corresponding to the document id.
*
* Note: The document will have all fields accessable, but if a field
* filter is provided, only the provided fields will be loaded (the
* remainder will be available lazily).
* 此方法从传入的docId号,到documentCached里面获取document,若没有获取到,这到索引里获取document,并加入到documentCached里面
*/
public Document doc(int i, Set<String> fields) throws IOException {
log.info("docId: " + i);
Document d;
if (documentCache != null) {
//重缓存里获取
d = (Document)documentCache.get(i);
if (d!=null) return d; //获取到返回
}
if(!enableLazyFieldLoading || fields == null) {
d = searcher.getIndexReader().document(i);
} else {
//直接到索引里获取document
d = searcher.getIndexReader().document(i,
new SetNonLazyFieldSelector(fields));
}
if (documentCache != null) {
//加入到documentCached缓存里面
documentCache.put(i, d);
}
return d;
}
分享到:
相关推荐
通过分析Solr 6.2.0的源码,我们可以深入了解其内部工作机制,包括索引构建、查询处理、分布式协调等核心模块。源码中包含了丰富的注释和示例,帮助开发者深入理解Solr的设计思想和实现细节。 总结来说,Solr 6.2.0...
solr全文检索,里面包含文档,源代码,jar包,使用的是solr4.2,东西比较全,安装文档就能跑起来,,适合参考借鉴
本人用ant idea命令花了214分钟,35秒编译的lucene-solr源码,可以用idea打开,把项目放在D:\space\study\java\lucene-solr路径下,再用idea打开就行了
接下来,我们将深入探讨 Solr 6.6.0 中的一些关键知识点。 一、Solr 架构与组件 Solr 的核心架构基于 Lucene 库,它提供了一个可扩展、高性能的搜索平台。主要组件包括: 1. **索引库**:Solr 使用 Lucene 来创建、...
源码分析是深入理解一个软件系统工作原理的重要途径,对于Solr这样的复杂系统尤其如此。这里我们将围绕"solr-9.0.0-src.tgz"这个源码包,详细探讨其主要组成部分、核心功能以及开发过程中的关键知识点。 1. **Solr...
Solr的安装部署包,只能分卷上传,稍后上传依赖jar包,及部署攻略
4. **学习Solr源码** - **阅读源码**:从`src`目录开始,可以了解Solr的架构设计,比如RequestHandlers、QueryParsers是如何工作的。 - **理解索引过程**:查看`solr-core`模块中的`IndexWriter`和`UpdateHandler`...
Solr项目源码及solr资源包是一个针对搜索引擎平台Apache Solr的学习与实践资源集合,主要结合了Spring Data Solr框架进行操作。这个项目旨在帮助开发者更好地理解和运用Solr进行数据索引和检索。让我们详细地探讨...
基于lucene的企业级搜索引擎。是一个独立的企业级搜索应用服务器,它对外提供类似于Web-service的API接口。...也可以通过Http G Solr et操作提出查找请求,并得到XML格式的返回结果。仅仅从官方提取的源码。
在深入探讨Solr-search过程的源码分析时,我们聚焦于关键步骤与核心组件,以求全面理解Solr搜索机制的内部运作。Solr作为一款高性能、可伸缩的开源搜索平台,其搜索处理流程涉及多个层次的组件交互与数据处理,其中...
在这个源码包中,我们可以深入理解Solr的工作原理以及其核心组件的实现。 首先,让我们了解Solr的基本架构。Solr基于Lucene库构建,Lucene是一个高性能、全文本检索库。Solr在其之上添加了分布式处理、集群管理、...
源码中会有相关的接口和类供开发者参考。 7. **打包成JAR**: 解压后的源码需要通过Maven或其他构建工具(如Gradle)进行编译和打包,生成的JAR文件才能被Solr识别并使用。这通常涉及`mvn clean package`命令。 8...
Solr源码在MyEclipse下的搭建 1. 下载并按装Ant 下载地址: http://ant.apache.org/bindownload.cgi Ant环境变量配置: ANT_HOME: E:\Program Files\apache-ant-1.9.0 Path: %ANT_HOME%\bin 在cmd中输入ant -v...
maven 整合solr4.2环境,另外整合了solr-data-import源码环境,资源10分,十分不贵!有需要的朋友请下载吧。花了我3个小时的时间整理的。下载后,使用maven导入即可使用,升级solr版本也比较方便。本环境使用了...
Solr源码在MyEclipse下的搭建 1. 下载并按装Ant 下载地址: http://ant.apache.org/bindownload.cgi Ant环境变量配置: ANT_HOME: E:\Program Files\apache-ant-1.9.0 Path: %ANT_HOME%\bin 在cmd中输入ant -v...
`saveProduct`方法用于将产品对象保存到Solr索引,`searchByKeyword`方法使用Solr的查询能力找到包含指定关键词的产品。 在`SpringBoot-Solr-demo`的源码中,你可能会看到一个完整的Spring Boot应用,包含了上述...
毕业设计基于Springboot的个人博客系统源码,集成solr服务器毕业设计基于Springboot的个人博客系统源码,集成solr服务器毕业设计基于Springboot的个人博客系统源码,集成solr服务器毕业设计基于Springboot的个人博客...
在更换缓存机制时,这些类可能会参与缓存的生成和查找过程,但具体实现取决于Solr的缓存接口和实现。 总之,通过集成Memcached,Solr能够利用更强大的分布式缓存能力,提升搜索性能,尤其是在高并发和大数据量场景...
Solr 依存于Lucene,因为Solr底层的核心技术是使用Lucene 来实现的,Solr和Lucene的本质区别有以下三点:搜索服务器,企业级和管理。Lucene本质上是搜索库,不是独立的应用程序,而Solr是。Lucene专注于搜索底层的...
Solr是Apache Lucene项目下的一个企业级搜索服务器,它提供了全文检索、命中高亮、 faceted search(分面搜索)等多种功能...通过深入理解DIH的工作原理和配置方法,我们可以更好地利用这一工具来优化我们的Solr部署。