庖丁解牛的Lucene 2.4的全文搜索代码 -

zfsn

浏览: 348469 次
性别:
来自: 北京

最近访客更多访客>>

ks2144634

testjiang3

livizy

liuxiao723846

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

庖丁解牛的Lucene 2.4的全文搜索代码

博客分类：

Lucene

lucene Apache SQL Spring .net

package com.laozizhu.article.util;

import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import javax.sql.DataSource;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;

/**
* 基于庖丁解牛的Lucene 2.4的全文搜索代码。
* 
* @author 老紫竹研究室(laozizhu.com)
*/
public class LucenePaoDing {
private static final String indexPath = "d:/indexpaoding/www.laozizhu.com";

/**
   * @param args
   * @throws Exception
   */
public static void main(String[] args) throws Exception {
    rebuildAll();
    String keyword = "Spring.jar";
    LucenePaoDing l = new LucenePaoDing();
    System.out.println("索引搜索\n------------------------------");
    System.out.println(l.seacherIndex(keyword));
}

public static void rebuildAll() {
    synchronized (indexPath) {
      LucenePaoDing l = new LucenePaoDing();
      DataSource ds = (DataSource) Factory.getBean("dataSource");
      Connection con = null;
      Statement stat = null;
      ResultSet rs = null;
      try {
        con = ds.getConnection();
        stat = con.createStatement();
        rs = stat.executeQuery("select id,subject,content from t_article");
        if (rs != null) {
          l.Index(rs);
        }
      } catch (Exception ex) {
        ex.printStackTrace();
      } finally {
        if (rs != null) {
          try {
            rs.close();
          } catch (Exception ex) {}
        }
        if (stat != null) {
          try {
            stat.close();
          } catch (Exception ex) {}
        }
        if (con != null) {
          try {
            con.close();
          } catch (Exception ex) {}
        }
      }
    }
}

public synchronized Analyzer getAnalyzer() {
    return new PaodingAnalyzer();
}

private synchronized void Index(ResultSet rs) {// 通过结果集就可以获得数据源了
    try {
      IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
      writer.setMaxFieldLength(10000000);
      Date start = new Date();
      int index = 1;
      while (rs.next()) {
        Document doc = new Document();// 一个文档相当与表的一条记录
        doc.add(new Field("id", rs.getString("id"), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是数据库表中的id，lucene的一条记录的一个字段下的数据可以放多个值，这点与数据库表不同
        doc.add(new Field("subject", rs.getString("subject"), Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("content", rs.getString("content"), Field.Store.YES, Field.Index.ANALYZED));
        writer.addDocument(doc);
        if (index++ == 1000) {
          writer.commit();
          index = 0;
        }
      }
      writer.commit();
      writer.optimize();// 优化
      writer.close();// 一定要关闭，否则不能把内存中的数据写到文件
      Date end = new Date();
      System.out.println("重建索引成功！！！！" + "用时" + (end.getTime() - start.getTime()) + "毫秒");
    } catch (IOException e) {
      System.out.println(e);
    } catch (SQLException e) {
      System.out.println(e);
    }
}

public void IndexSingle(long id, String subject, String content) {// 通过结果集就可以获得数据源了
    synchronized (indexPath) {
      try {
        IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setMaxFieldLength(10000000);
        Date start = new Date();
        Document doc = new Document();// 一个文档相当与表的一条记录
        doc.add(new Field("id", Long.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED));// 字段id放的是数据库表中的id，lucene的一条记录的一个字段下的数据可以放多个值，这点与数据库表不同
        doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
        doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
        writer.addDocument(doc);
        // writer.optimize();// 优化
        writer.close();// 一定要关闭，否则不能把内存中的数据写到文件
        Date end = new Date();
        System.out.println("索引建立成功！！！！" + "用时" + (end.getTime() - start.getTime()) + "毫秒");
      } catch (IOException e) {
        System.out.println(e);
      }
    }
}

/**
   * 最主要的搜索方法。
   * 
   * @param queryString
   * @return
   */
public List<Long> seacherIndex(String queryString) {// 根据关键字搜索
    try {
      IndexSearcher isearcher = new IndexSearcher(indexPath);
      /* 下面这个表示要同时搜索这两个域，而且只要一个域里面有满足我们搜索的内容就行 */
      BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
      TopDocCollector collector = new TopDocCollector(10);
      Query query = MultiFieldQueryParser.parse(queryString, new String[] { "subject", "content" }, clauses, getAnalyzer());
      isearcher.search(query, collector);
      ScoreDoc[] hits = collector.topDocs().scoreDocs;
      List<Long> rtn = new ArrayList<Long>();
      Long id;
      int docId;
      for (int i = 0; i < hits.length; i++) {
        docId = hits[i].doc;
        Document doc = isearcher.doc(docId);
        id = Long.parseLong(doc.get("id").trim());
        if (!rtn.contains(id)) {
          rtn.add(id);
        }
      }
      isearcher.close();
      return rtn;
    } catch (Exception e) {
      e.printStackTrace();
      return null;
    }
}
}

分享到：

Lucene 2.4更新索引的方法(Update Index) | Lucene 搜索方式

2009-11-07 16:30
浏览 1402
评论(0)
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

庖丁解牛的Lucene 2.4的全文搜索代码

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

庖丁解牛的Lucene 2.4的全文搜索代码

评论

发表评论

相关推荐

布隆过滤器（Bloom Filter）之java实例

Lucene查询语法详解

使用Lucene的Highlighter实现文件摘要的自动提取

ICTCLAS 中科院分词系统 代码 注释 中文分词 词性标注

Lucene日期排序及组合查询

Lucene中自定义排序的实现

在Lucene中应用poading进行分词

用Lucene实现摘要的高亮点

Lucene日期索引搜索

Lucene 中文引擎，庖丁解牛的辞典参数配置方法

Lucene 2.4更新索引的方法(Update Index)

Lucene 搜索方式

转一篇lucene的使用的文章，写的比较全

最近访客更多访客>>

ICTCLAS 中科院分词系统代码注释中文分词词性标注