lucene最新版本3.3的基本功能用法（IK分词是3.2.8）

aiyan3344

浏览: 206129 次
性别:
来自: 北京

最近访客更多访客>>

康敏栋

toly815

star77266989

zhaohuaxishiwzw

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene

lucene

1.创建索引和查询

package dataFromOracle.dao;

import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;


import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
import org.wltea.analyzer.lucene.IKQueryParser;
import org.wltea.analyzer.lucene.IKSimilarity;

import dataFromOracle.conn.Conn;
import dataFromOracle.entity.SearchResult;

public class TestLucene {

	// 获得数据库的数据
	public ResultSet getResult(String sql) {
		try {
			Statement stmt = Conn.getConnection().createStatement();// 获得数据库连接
			ResultSet rs = stmt.executeQuery(sql);
			return rs;
		} catch (SQLException e) {
			System.out.println(e);
		}
		return null;
	}

	// 创建索引
	public void Index(ResultSet rs,File indexDir) {
		try {
			//File indexDir = new File("d:\\fileIndex");
			Analyzer analyzer = new IKAnalyzer();  
			IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_33,
					analyzer);
			
			conf.setOpenMode(OpenMode.CREATE);
			IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir),
					conf);
			// IndexWriter writer = new IndexWriter("d:/index/", getAnalyzer(),
			// true);
			while (rs.next()) {
				// document中添加field字段
				Document doc = new Document();
				doc.add(new Field("id", rs.getString(1), Field.Store.YES,
						Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
				doc.add(new Field("title", rs.getString(2),
						Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
				doc.add(new Field("content", rs.getString(3),
						Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
				doc.add(new Field("author", rs.getString(4),
						Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
				doc.add(new Field("time", rs.getString(5),
						Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
				writer.addDocument(doc);
			}
			writer.optimize();
			writer.close();
		} catch (IOException e) {
			System.out.println(e);
		} catch (SQLException e) {
			System.out.println(e);
		}
	}


	// 查询
	public List<SearchResult> seacherStr(String queryString,String searchdictory) {
		List<SearchResult> list=null;
		TopDocs topDocs = null;
		Query query = null;
		IndexSearcher searcher = null;
		try {
			searcher = new IndexSearcher(FSDirectory.open(new File(searchdictory)), true);// read-only
			//String fields = "City";
			String [] fields = {"title","content"};  //,"author"
			String field="title";
	
			 //使用IKQueryParser查询分析器构造Query对象  
			
//			//声明BooleanClause.Occur[]数组,它表示多个条件之间的关系     
//            BooleanClause.Occur[] flags=new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST};   
			
            
             query = IKQueryParser.parseMultiField(fields, queryString);//多个
//         //在索引器中使用IKSimilarity相似度评估器  
           searcher.setSimilarity(new IKSimilarity()); 		
           // 准备高亮器
           Formatter formatter=new SimpleHTMLFormatter("<span class=\"highlighter\">","</span>");
           Scorer fragmentScorer=new QueryScorer(query);
           Highlighter highlighter=new Highlighter(formatter, fragmentScorer);
           Fragmenter fragmenter=new SimpleFragmenter(100);//高亮范围
           highlighter.setTextFragmenter(fragmenter);
          
			
			if (searcher != null) {
				topDocs = searcher.search(query, 100);// 100是显示队列的Size
				ScoreDoc[] hits = topDocs.scoreDocs;
				System.out.println("共有" + searcher.maxDoc() + "条索引，命中"
						+ hits.length + "条");
				list=new ArrayList<SearchResult>();
				for (int i = 0; i < hits.length; i++) {
					 ScoreDoc scoreDoc = topDocs.scoreDocs[i];//读取第几条记录  
					 int docSn=scoreDoc.doc;  
		               //文档内部编号    
			          Document document=searcher.doc(docSn);
			          
			          
			          //高亮
			          String content = document.get("content");//获取属性值
			          //如果当前属性值中没有出现关键字,则返回null
			          String hccontent=highlighter.getBestFragment(new IKAnalyzer(), "content", content);
			          
			          String title=document.get("title");
			          String hctitle=highlighter.getBestFragment(new IKAnalyzer(), "title", title);
			          System.out.println("hccontent:"+hccontent);
			          System.out.println("hctitle:"+hctitle);
			          
			          if(hccontent==null){
			        	  hccontent=document.get("content");
			          }
			          if(hctitle==null){
			        	  hctitle=document.get("title");
			          }
			          SearchResult rs=new SearchResult();
			          
			          rs.setId(document.get("id").toString());
			          rs.setTitle(hctitle);
			          rs.setContent(hccontent);
			          rs.setAuthor(document.get("author"));
			          rs.setTime(document.get("time"));
			          list.add(rs);
				}
				 
			}
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
 catch (InvalidTokenOffsetsException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return list;
	}
	
	
}

2.查询结果的实体

package dataFromOracle.entity;

public class SearchResult {
	private String id;
	private String title;
	private String content;
	private String author;
	private String time;

	public String getId() {
		return id;
	}

	public void setId(String id) {
		this.id = id;
	}

	public String getTitle() {
		return title;
	}

	public void setTitle(String title) {
		this.title = title;
	}

	public String getContent() {
		return content;
	}

	public void setContent(String content) {
		this.content = content;
	}

	public String getAuthor() {
		return author;
	}

	public void setAuthor(String author) {
		this.author = author;
	}

	public String getTime() {
		return time;
	}

	public void setTime(String time) {
		this.time = time;
	}
}

3.servlet

package dataFromOracle.servlet;

import java.io.IOException;
import java.util.List;

import javax.servlet.RequestDispatcher;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.lucene.document.Document;

import dataFromOracle.dao.TestLucene;
import dataFromOracle.entity.SearchResult;

public class SearchServlet extends HttpServlet {

	@Override
	protected void doGet(HttpServletRequest req, HttpServletResponse resp)
			throws ServletException, IOException {
		// TODO Auto-generated method stub
		this.doPost(req, resp);
	}

	@Override
	protected void doPost(HttpServletRequest req, HttpServletResponse resp)
			throws ServletException, IOException {
		// TODO Auto-generated method stub
		req.setCharacterEncoding("utf-8");
		String queryString=req.getParameter("queryString");//获得页面输入的查询关键字
		String queryType=req.getParameter("queryType");
		System.out.println(queryString);
		TestLucene tl=new TestLucene();
	
		String filedir="d:\\fileIndex";
		
		if(null!=queryType && "blogs".equals(queryType)){
			filedir+="\\blogs";
		}else if (null!=queryType && "news".equals(queryType)){
			filedir+="\\news";
		}else{
			filedir+="\\all";
		}
		//无法显示具体的内容
		List<SearchResult> list=tl.seacherStr(queryString,filedir);
		
		req.setAttribute("list", list);
		req.setAttribute("queryString", queryString);
		req.setAttribute("queryType", queryType);
		//resp.sendRedirect("show.jsp");
		RequestDispatcher rd=req.getRequestDispatcher("/show.jsp");
		rd.forward(req, resp);
	}

	@Override
	public void service(ServletRequest arg0, ServletResponse arg1)
			throws ServletException, IOException {
		// TODO Auto-generated method stub
		super.service(arg0, arg1);
	}

}

4.生成索引--目前是分类的索引，我用的是分离索引库。生成索引这个步骤可以考虑做成定时任务

package dataFromOracle.build;

import java.io.File;
import java.sql.ResultSet;

import dataFromOracle.dao.TestLucene;

public class BuildIndex {
	public static void main(String[] args) {
		buildBlogsIndex();
		buildNewsIndex();
		buildAllIndex();
		
	}
	
	private static void buildBlogsIndex(){
		//获得数据库的数据
		TestLucene tl=new TestLucene();
		ResultSet rs=tl.getResult("select t.id,t.title,t.content,t.author,t.time from blogs t");
		//创建索引
		String filedir="d:\\fileIndex\\blogs";
		
		File indexDir = new File(filedir);
		tl.Index(rs,indexDir);
		System.out.println("buildBlogsIndex完成！");
	}
	private static void buildNewsIndex(){
		//获得数据库的数据
		TestLucene tl=new TestLucene();
		ResultSet rs=tl.getResult("select t.id,t.title,t.content,t.author,t.time from news t");
		//创建索引
		String filedir="d:\\fileIndex\\news";
		
		File indexDir = new File(filedir);
		tl.Index(rs,indexDir);
		System.out.println("buildNewsIndex完成！");
	}
	private static void buildAllIndex(){
		//获得数据库的数据
		TestLucene tl=new TestLucene();
		ResultSet rs=tl.getResult("select t.id,t.title,t.content,t.author,t.time from news t");
		//创建索引
		String filedir="d:\\fileIndex\\all";
		
		File indexDir = new File(filedir);
		tl.Index(rs,indexDir);
		System.out.println("buildAllIndex中的news完成");
		//获得数据库的数据
		rs=tl.getResult("select t.id,t.title,t.content,t.author,t.time from blogs t");
		//创建索引
		filedir="d:\\fileIndex\\blogs";
		
		indexDir = new File(filedir);
		tl.Index(rs,indexDir);
		System.out.println("buildAllIndex中的blogs完成！");
	}
	
}

5.数据库表结构--news表和blogs表字段一致

-- Create table
create table BLOGS
(
  ID      NUMBER(10) not null,
  TITLE   VARCHAR2(255) not null,
  CONTENT VARCHAR2(1024) not null,
  AUTHOR  VARCHAR2(255) not null,
  TIME    TIMESTAMP(6) not null,
  KEYWORD VARCHAR2(255),
  DIGEST  VARCHAR2(255)
);
-- Create/Recreate primary, unique and foreign key constraints 
alter table BLOGS
  add primary key (ID);

注：数据库连接的类没有贴出来，

Lucene.rar (7.6 MB)
下载次数: 63

分享到：

SSH + Lucene + 分页 + 排序 + 高亮模 ... | org.apache.lucene.analysis.TokenStream.i ...

2011-10-25 17:00
浏览 2316
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene最新版本3.3的基本功能用法（IK分词是3.2.8）

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene最新版本3.3的基本功能用法（IK分词是3.2.8）

评论

发表评论

相关推荐

Lucene

SSH + Lucene + 分页 + 排序 + 高亮 模拟简单新闻网站搜索引擎

org.apache.lucene.analysis.TokenStream.incrementToken()Z

Lucene多字段搜索

最近访客更多访客>>

SSH + Lucene + 分页 + 排序 + 高亮模拟简单新闻网站搜索引擎