论坛首页 Java企业应用论坛

庖丁解牛的Lucene2.4全文搜索代码

浏览 2473 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
作者 正文
   发表时间:2009-04-24   最后修改:2009-11-26
package org.heming.sucene.test;

import java.io.IOException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.LockObtainFailedException;

public class LucenePaoDing {

 private static final String indexPath = "f/何明/heming/le";

 /**
  * 
  * @param args
  */
 public static void main(String[] args) {
  rebuildAll();
  String keyword = "3";
  LucenePaoDing lucene = new LucenePaoDing();
  System.out.println("索引搜索\n-----------------------------------------");
  System.out.println(lucene.searcherIndex(keyword));
 }

 private synchronized Analyzer getAnalyzer() {

  return new StandardAnalyzer();
 }

 public static void rebuildAll() {
  synchronized (indexPath) {
   LucenePaoDing lucene = new LucenePaoDing();
   Connection con = null;
   Statement stt = null;
   ResultSet rs = null;
   String sql ="select T2SMI001,T2SMI002,T2SMI003,T2SMI004 from T_2SYSMENU_INF";
   try {
    con = DBConnection.getConnection();
    stt = con.createStatement();
    rs = stt.executeQuery(sql);
    System.out.println(sql);
    if (rs.next()) {
     lucene.Index(rs);
    }
   } catch (Exception e) {
    e.printStackTrace();
   } finally {
    if (con != null)
     try {
      con.close();
     } catch (SQLException e) {
      e.printStackTrace();
     }
   }
  }
 }

 private synchronized void Index(ResultSet rs) { // 通过结果集获得数据源
  try {
   IndexWriter writer = new IndexWriter(indexPath, getAnalyzer(),
     true, IndexWriter.MaxFieldLength.UNLIMITED);
   writer.setMaxFieldLength(10000000);
   long start = new Date().getTime();
   int index = 1;
   while (rs.next()) {
    Document doc = new Document(); // 一个文档相当于一个表的记录
    doc.add(new Field("T2SMI001", rs.getString("T2SMI001"),
      Field.Store.YES, Field.Index.NOT_ANALYZED)); // 字段T2SMI001是数据库表中的T2SMI001,lucene的一条记录的一个字段下的数据可以放多个值,这点与数据库表中不同
    doc.add(new Field("T2SMI002", rs.getString("T2SMI002"),
      Field.Store.YES, Field.Index.NOT_ANALYZED));
//    doc.add(new Field("T2SMI003", rs.getString("T2SMI003"),
//      Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("T2SMI004", rs.getString("T2SMI004"),
      Field.Store.YES, Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);
    if (index++ == 1000) {
     writer.commit();
     index = 0;
    }
   }
   writer.commit();
   writer.optimize(); // 优化
   writer.close(); // 一定要关闭,否则不能把内存中的数据读取到文件
   long end = new Date().getTime();
   System.out.println("重建索引成功!!!" + "用时" + (end - start) + "毫秒");

  } catch (Exception e) {
   e.printStackTrace();
  }

 }
 
 public void IndexSigle(long T2SMI001,String T2SMI002,String T2SMI004){      //通过结果集就可以获得数据源了
  synchronized(indexPath){
   try {
    IndexWriter writer = new IndexWriter(indexPath,getAnalyzer(),false,IndexWriter.MaxFieldLength.UNLIMITED);
    writer.setMaxFieldLength(100000000);
    long start = new Date().getTime();
    Document doc = new Document();
    doc.add(new Field("T2SMI001",Long.toString(T2SMI001),Field.Store.YES,Field.Index.NOT_ANALYZED));// 字段id放的是数据库表中的id,lucene的一条记录的一个字段下的数据可以放多个值,这点与数据库表不同
    doc.add(new Field("T2SMI002",T2SMI002,Field.Store.YES,Field.Index.NOT_ANALYZED));
//    doc.add(new Field("T2SMI003",T2SMI003,Field.Store.YES,Field.Index.NOT_ANALYZED));
    doc.add(new Field("T2SMI004",T2SMI004,Field.Store.YES,Field.Index.NOT_ANALYZED));
    writer.addDocument(doc);
 //   writer.optimize(); //优化
    writer.close();
    long end = new Date().getTime();
//    System.out.println("重建索引成功!!!" + "用时" + (end - start) + "毫秒");
   } catch (CorruptIndexException e) {
    e.printStackTrace();
   } catch (LockObtainFailedException e) {
    e.printStackTrace();
   } catch (IOException e) {
    e.printStackTrace();
   }
  }
 }
 
 /**
  * 最主要的搜索方法.
  * @param queryString
  * @return
  */
 public List<Long> searcherIndex(String queryString){ //根据关键字搜索
  try{
   IndexSearcher searcher = new IndexSearcher(indexPath);
   /*下面这个表示要同时搜索这两个域,而且只要一个域里有满足我们搜索的内容就行了*/
   BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
   TopDocCollector collector = new TopDocCollector(10);
   Query query = MultiFieldQueryParser.parse(queryString,new String[]{ "T2SMI002","T2SMI004" },clauses,getAnalyzer());
   searcher.search(query,collector);
   ScoreDoc[] hits = collector.topDocs().scoreDocs;
   List<Long> list = new ArrayList<Long>();
   long id;
   int docId;
   for(int i = 0; i < hits.length; i++){
    docId = hits[i].doc;
    Document doc = new Document();
    id = Long.parseLong(doc.get("T2SMI001").trim());
    if(!list.contains(id)){
     list.add(id);
    }
   }
   searcher.close();
   return list;
  }catch(Exception e){
   e.printStackTrace();
   return null;
  }
 }
 
}


论坛首页 Java企业应用版

跳转论坛:
Global site tag (gtag.js) - Google Analytics