浏览 1960 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2013-05-12
public interface Weight extends java.io.Serializable { /** The query that this concerns. */ Query getQuery(); /** The weight for this query. */ float getValue(); /** The sum of squared weights of contained query clauses. */ float sumOfSquaredWeights() throws IOException; /** Assigns the query normalization factor to this. */ void normalize(float norm); /** Constructs a scorer for this. */ Scorer scorer(IndexReader reader) throws IOException; /** An explanation of the score computation for the named document. */ Explanation explain(IndexReader reader, int doc) throws IOException; } 这是weight接口的所有方法。 我们主要使用的是scorer方法,计算最后的得分。 public Scorer scorer(IndexReader reader) throws IOException { BooleanScorer2 result = new BooleanScorer2(similarity, minNrShouldMatch, allowDocsOutOfOrder); for (int i = 0 ; i < weights.size(); i++) { BooleanClause c = (BooleanClause)clauses.get(i); Weight w = (Weight)weights.get(i); Scorer subScorer = w.scorer(reader); if (subScorer != null) result.add(subScorer, c.isRequired(), c.isProhibited()); else if (c.isRequired()) return null; } return result; } 这里需要weights,看看weights是怎么来的: protected ArrayList weights = new ArrayList(); public BooleanWeight(Searcher searcher) throws IOException { this.similarity = getSimilarity(searcher); for (int i = 0 ; i < clauses.size(); i++) { BooleanClause c = (BooleanClause)clauses.get(i); weights.add(c.getQuery().createWeight(searcher)); } } 其实这个就是依赖query的createWeight方法。 看TermQuery的 public TermWeight(Searcher searcher) throws IOException { this.similarity = getSimilarity(searcher); idf = similarity.idf(term, searcher); // compute idf } 可以看到idf 是在这一步算出来的。 TermScorer(Weight weight, TermDocs td, Similarity similarity, byte[] norms) { super(similarity); this.weight = weight; this.termDocs = td; this.norms = norms; this.weightValue = weight.getValue(); for (int i = 0; i < SCORE_CACHE_SIZE; i++) scoreCache[i] = getSimilarity().tf(i) * weightValue; } 得分的计算我们需要weight的value。weight.getValue() value值是怎么来的呢: public void normalize(float queryNorm) { this.queryNorm = queryNorm; queryWeight *= queryNorm; // normalize query weight value = queryWeight * idf; // idf for document } 可以看到value是由queryWeight和queryNorm确定的。 public float sumOfSquaredWeights() { queryWeight = idf * getBoost(); // compute query weight return queryWeight * queryWeight; // square it } queryWeight 就是idf和boosst的乘积。 float norm = getSimilarity(searcher).queryNorm(sum); weight.normalize(norm); queryNorm的值由queryNorm方法确定。 public float queryNorm(float sumOfSquaredWeights) { return (float)(1.0 / Math.sqrt(sumOfSquaredWeights)); } public float sumOfSquaredWeights() { queryWeight = idf * getBoost(); // compute query weight return queryWeight * queryWeight; // square it } 说白了,queryNorm是有queryWeight 确定,但是这里多了query的boost,queryWeight 只是某一个term的boost。 BooleanQuery: public float sumOfSquaredWeights() throws IOException { float sum = 0.0f; for (int i = 0 ; i < weights.size(); i++) { BooleanClause c = (BooleanClause)clauses.get(i); Weight w = (Weight)weights.get(i); // call sumOfSquaredWeights for all clauses in case of side effects float s = w.sumOfSquaredWeights(); // sum sub weights if (!c.isProhibited()) // only add to sum for non-prohibited clauses sum += s; } sum *= getBoost() * getBoost(); // boost each sub-weight return sum ; } 注意 BooleanQuery的boost是整个query的boost。 到这里,计算得分计算的变量都确定了,其实只是三个变量,idf,term的boost,query的boost。所以其实我们一般扩展也就是改这三个变量的值。 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |