DefaultSkipListReader查找docId

chengqianl

浏览: 53757 次
性别:
来自: 杭州

最近访客更多访客>>

ForLove_ForYOU

阿祥哥

dj78337323

donchiang709

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene

Go UP Cache

DefaultSkipListReader查找docId
MultiLevelSkipListReader
public MultiLevelSkipListReader(IndexInput skipStream, int maxSkipLevels, int skipInterval) {
/**每个层的文件文件的IndexInput读取对象，是通过定位到每个文件的相对应的skiptable的位置层的位置，clone下就得到新的IndexInput 对象了
**/
this.skipStream = new IndexInput[maxSkipLevels];
/**
Skiplist每次的
**/
this.skipPointer = new long[maxSkipLevels];
//
this.childPointer = new long[maxSkipLevels];
// 当前层相对原始层跳过的元素个数
this.numSkipped = new int[maxSkipLevels];
// 跳表有多少层
this.maxNumberOfSkipLevels = maxSkipLevels;
// 每层相对于原始层跳表的间隔的元素个数
this.skipInterval = new int[maxSkipLevels];

    this.skipStream [0]= skipStream;
    this.inputIsBuffered = (skipStream instanceof BufferedIndexInput);
this.skipInterval[0] = skipInterval;
// 由于skipInterval 是已知的，所以每层的间隔就可以计算出来
    for (int i = 1; i < maxSkipLevels; i++) {
      // cache skip intervals
      this.skipInterval[i] = this.skipInterval[i - 1] * skipInterval;
}
//记录当前层的docId的
    skipDoc = new int[maxSkipLevels];
}

skipTo(int target)
扫描skiplist返回不小于某个docId的前面的那个skipdata所比较的doc的数目
int skipTo(int target) throws IOException {
    if (!haveSkipped) {
      // first time, load skip levels
      loadSkipLevels();
      haveSkipped = true;
    }

// skipDoc 记录是当前level遍历到的docId，从最低层向最高层比较，直到找到targt大//于某个level的docId
    // walk up the levels until highest level is found that has a skip
    // for this target
    int level = 0;
    while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) {
      level++;
    }
// 查找
    while (level >= 0) {
      if (target > skipDoc[level]) {// 如果target大于level上的docId，读取//下一个skiplist实体skipdata，直到找到大于这个target的docId
        if (!loadNextSkip(level)) {
          continue;
        }
      } else {
        // no more skips on this level, go down one level
        if (level > 0 && lastChildPointer > skipStream[level - 1].getFilePointer()) {
          seekChild(level - 1);
        }
        level--;
      }
    }
    //
    return numSkipped[0] - skipInterval[0] - 1;
}

loadSkipLevels()
加载level信息，
/** Loads the skip levels */
private void loadSkipLevels() throws IOException {

/**由包含这个term的document的数目计算skiptable的层数，如果超过maxNumberOfSkipLevels则为maxNumberOfSkipLevels**/

    numberOfSkipLevels = docCount == 0 ? 0 : (int) Math.floor(Math.log(docCount) / Math.log(skipInterval[0]));
    if (numberOfSkipLevels > maxNumberOfSkipLevels) {
      numberOfSkipLevels = maxNumberOfSkipLevels;
}

//Seek到skipPointer[0]的位置，也就是也就是，在frg文件里面skipdata起始位置如果图2

    skipStream[0].seek(skipPointer[0]);

/** 标识读取到内存中的skiptable中level的数目**/
int toBuffer = numberOfLevelsToBuffer;
    //
for (int i = numberOfSkipLevels - 1; i > 0; i--) {
// skiptable的层的长度，如后面图1标示
      // the length of the current level
      long length = skipStream[0].readVLong();
      // 当前层的起始偏移量
      // the start pointer of the current level
      skipPointer[i] = skipStream[0].getFilePointer();
      if (toBuffer > 0) {
// 将文件数据读入到内存，定位到下一个level的起始位置
        // buffer this level
        skipStream[i] = new SkipBuffer(skipStream[0], (int) length);
        toBuffer--;
      } else {
// 克隆这个IndexInput，为了每个level的读取
        // clone this stream, it is already at the start of the current level
        skipStream[i] = (IndexInput) skipStream[0].clone();
        if (inputIsBuffered && length < BufferedIndexInput.BUFFER_SIZE) {
          ((BufferedIndexInput) skipStream[i]).setBufferSize((int) length);
        }
        //定位到下一个level的起始位置
        // move base stream beyond the current level
        skipStream[0].seek(skipStream[0].getFilePointer() + length);
      }
    }

    // use base stream for the lowest level
    skipPointer[0] = skipStream[0].getFilePointer();
}

loadNextSkip
private boolean loadNextSkip(int level) throws IOException {
/**
设置最后访问层的docId和下个节点的位置
**/
    // we have to skip, the target document is greater than the current
    // skip list entry
    setLastSkipData(level);
    // 记录跳过的元素的个数，例如跳表的间隔为16，则第0层的第一个元素相对于原数据，跳过了16个元素，第1层相对于第0层跳过了16 个元素，第1层相对于原始层跳过了16*16 个元素，这个地方记录的是相对原始层跳过的元素的个数
    numSkipped[level] += skipInterval[level];
      // 判断某层跳过的document的数目是否大于最大文档数目
    if (numSkipped[level] > docCount) {
      // this skip list is exhausted
      skipDoc[level] = Integer.MAX_VALUE;
      if (numberOfSkipLevels > level) numberOfSkipLevels = level;
      return false;
    }
// 读取跳表中实体的值，返回的docId和前面一个实体的docId的差值，所以正确的值应//该是，返回值加上前面的差值skipDoc[],这个数组记录的是当前level的移动到的实体的//docId
    // read next skip entry
    skipDoc[level] += readSkipData(level, skipStream[level]);

if (level != 0) {
// 计算下一个level的起始位置，也就是本层的skipdata在下个层的位置
      // read the child pointer if we are not on the leaf level
      childPointer[level] = skipStream[level].readVLong() + skipPointer[level - 1];
    }

    return true;

}

readSkipData
//读取一个跳表中的实体
protected int readSkipData(int level, IndexInput skipStream) throws IOException {
    int delta;// docId 运营delDa规则存储的
    if (currentFieldStoresPayloads) {
      // the current field stores payloads.
      // if the doc delta is odd then we have
      // to read the current payload length
      // because it differs from the length of the
      // previous payload
      delta = skipStream.readVInt();
      if ((delta & 1) != 0) {
        payloadLength[level] = skipStream.readVInt();
      }
      delta >>>= 1;
    } else {
      delta = skipStream.readVInt();
}
//文档号对应的倒排表中的节点在frq
//中的偏移量，文档号对应的倒排表中的节点在prx中的偏移量。
    freqPointer[level] += skipStream.readVInt();
    proxPointer[level] += skipStream.readVInt();

    return delta;
}

next()
public boolean next() throws IOException {
    while (true) {
      if (count == df)
        return false;
// 读取下一个docId
      final int docCode = freqStream.readVInt();

      if (currentFieldOmitTermFreqAndPositions) {
        doc += docCode;
        freq = 1;
      } else {
// 由于使用了DocDelta[, Freq?]，规则，所以读到的docId，向左移一位得到和前面skipdata的docId的差值，加上前面的docId的值就是实际的docId的值，由于frg等于1，则docCode 的最后一位是 1，说明frg等于1，不用往后读取frg的值了。

        doc += docCode >>> 1;       // shift off low bit
        if ((docCode & 1) != 0)       // if low bit is set
          freq = 1;         // freq is one
        else
          freq = freqStream.readVInt();     // else read freq
      }

      count++;

// 查看docId是否在删除的文档里面
if (deletedDocs == null || !deletedDocs.get(doc))
        break;
      skippingDoc();
    }
    return true;
}

图1

图2