lucene 学习

yupengcc
浏览: 140779 次
性别:
来自: 重庆
最近访客更多访客>>

我啊来了
Linkcyzk
御羽倾城
cocpp
博主相关

博客
微博
相册
留言
关于我
文章分类

社区版块

存档分类

博客分类：
资料搜集
lucene
  1. T_ARTICLE 表
DROP TABLE IF EXISTS `t_article`;
CREATE TABLE `t_article` (
  `ID` varchar(32) NOT NULL default '',
  `ARTICLE_TITLE` varchar(255) default NULL,
  `ARTICLE_TAG` varchar(255) default NULL,
  `ARTICLE_CONTENT` text,
  PRIMARY KEY  (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
    上面的数据 随便加一下吧，主要是article_content和article_title字段的数据，article_content是个text字段，所以你可以粘贴网页的文字内容添加到本字段中。
 
    2.业务接口 ArticleService.java
 
package com.jushi.lucene.business;

import java.util.List;
import com.jushi.lucene.entities.Article;
/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:24:42 PM
 * @Version V 1.0
 */

public interface IArticleService {

	public List<Article> getArticles(String query);

	public boolean createIndex();

}


   3. 接口的实现 ArticleServiceImpl.java 主要的业务在这里实现：创建索引，搜索数据处理。
 
package com.jushi.lucene.business;

import java.io.File;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.jushi.lucene.dao.IArticleDAO;
import com.jushi.lucene.entities.Article;

/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:25:00 PM
 * @Version V 1.0
 */
public class ArticleServiceImpl implements IArticleService {
	
	private IArticleDAO articleDAO;
	
	private final String INDEXPATH = "g:\\index";
	private Analyzer analyzer = new StandardAnalyzer();
	
	public List<Article> getArticles(String query) {
		
		try{
		List<Article> qlist = new ArrayList<Article>();
		String fieldName = "title";
		IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH);
		
		//QueryParser parser = new QueryParser(fieldName, analyzer); //单 key 搜索
		//Query queryOBJ = parser.parse(query);
		System.out.println(">>> 2.开始读取索引... ... 通过关键字：【 "+ query +" 】");
		long begin = new Date().getTime();
		
		//下面的是进行title,content 两个范围内进行收索.
		BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD };
		Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"title","content"}, clauses, new StandardAnalyzer());//parser.parse(query);
		Filter filter = null;
		
		//################# 搜索相似度最高的记录 ###################
		TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000);
		//TopDocs topDocs = indexSearcher.search(queryOBJ , 10000);
		System.out.println("*** 共匹配：" + topDocs.totalHits + "个 ***");
		
		Article article = null;
		
		//输出结果
		for (ScoreDoc scoreDoc : topDocs.scoreDocs){
				Document targetDoc = indexSearcher.doc(scoreDoc.doc);
				article = new Article();
				
				//设置高亮显示格式
				SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'><strong>", "</strong></font>"); 
				/* 语法高亮显示设置 */
				Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(queryOBJ)); 
				highlighter.setTextFragmenter(new SimpleFragmenter(100)); 
				
				// 设置高亮 设置 title,content 字段
				String title = targetDoc.get("title");
				String content = targetDoc.get("content");
				TokenStream titleTokenStream = analyzer.tokenStream(fieldName,new StringReader(title));
				TokenStream contentTokenStream = analyzer.tokenStream("content",new StringReader(content));
				String highLightTitle = highlighter.getBestFragment(titleTokenStream, title);
				String highLightContent = highlighter.getBestFragment(contentTokenStream, content);
				
			     if(highLightTitle == null)
			    	 highLightTitle = title;

			     if(highLightContent == null) 
			    	 highLightContent = content;
			     
			    article.setTitle(highLightTitle);
				article.setContent(highLightContent);
				article.setTag(targetDoc.get("tag"));
				article.setTotalHits(topDocs.totalHits);
				
				qlist.add(article);
		}
		
		long end = new Date().getTime();
		System.out.println(">>> 3.搜索完毕... ... 共花费：" + (end - begin) +"毫秒...");
		
		indexSearcher.close();
		
		return qlist;
		
		}catch(Exception e){
			e.printStackTrace();
			return null;
		}
	}
	
	public boolean createIndex()
	{
		//检查索引是否存在
		if(this.isIndexExisted())
			return this.isIndexExisted();
		
		List<Article> list = articleDAO.getArticles();
		
		try
		{
			Directory directory = FSDirectory.getDirectory(INDEXPATH);
			IndexWriter indexWriter = new IndexWriter(directory, analyzer ,true, IndexWriter.MaxFieldLength.LIMITED);
			
			long begin = new Date().getTime();
			for(Article art: list)
			{
				Document doc = new Document();
				String title = art.getTitle() == null ? "" : art.getTitle().trim();
				String content = art.getContent() == null ? "" : art.getContent();
				String tag = art.getTag() == null ? "" : art.getTag();
				doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
				doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
				doc.add(new Field("tag", tag, Field.Store.COMPRESS,Field.Index.NO));
				indexWriter.addDocument(doc);
			}
			long end = new Date().getTime();
			System.out.println(">>> 1.存入索引完毕.. 共花费：" + (end - begin) +"毫秒...");
			
			indexWriter.optimize();
			indexWriter.close();
			return true;
			
		}catch(Exception e){
			e.printStackTrace();
			return false;
		}
	}
	
	/**
	 * check Index is Existed
	 * @return true or false
	 */
	private boolean isIndexExisted()
	{
		try
		{
			File dir = new File(INDEXPATH);
			if(dir.listFiles().length>0)
				return true;
			else
				return false;
			
		}catch(Exception e){
			e.printStackTrace();
			return false;
		}
	}
	
	public void setArticleDAO(IArticleDAO articleDAO) {
		this.articleDAO = articleDAO;
	}

}

    4. DAO 接口及 DAO实现　
 
public interface IArticleDAO {

	public List<Article> getArticles();
}
 
 
public class ArticleDAOImpl extends JdbcDaoSupport implements IArticleDAO{

	public List<Article> getArticles() {
		String sql = "SELECT article_title, article_tag, article_content FROM t_article";
		return (List<Article>)this.getJdbcTemplate().query(sql, new ArticleRowMapper());
	}

}
 
   5. 上面DAO需要的 ArticleRowMapper: 这里做了下小改动
 
package com.jushi.lucene.rowmapper;

import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;

import org.springframework.jdbc.core.RowMapper;

import com.jushi.lucene.entities.Article;

/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:31:54 PM
 * @Version V 1.0
 */
public class ArticleRowMapper implements RowMapper {

	public Object mapRow(ResultSet rs, int rowNum) throws SQLException {
		Article article = new Article();
		ResultSetMetaData meta = rs.getMetaData();
		int colNum = meta.getColumnCount();
		for (int i = 1; i <= colNum; i++) {
			String colName = meta.getColumnName(i).toLowerCase();
			if ("article_title".equals(colName)) {
				article.setTitle(rs.getString(i));
			} else if ("article_content".equals(colName)) {
				article.setContent(rs.getString(i));
			} else if ("article_tag".equals(colName)) {
				article.setTag(rs.getString(i));
				//...
			}
		}
		return article;
	}
}
 
   6. Article 实体 添加了两个属性: TotalHits, TotalTime
 
package com.jushi.lucene.entities;

/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:18:48 PM
 * @Version V 1.0
 */
public class Article {

	private String title;
	private String content;
	private String tag;
	private int TotalHits; //命中率
	private int TotalTime; //花费时间
	
	public int getTotalTime() {
		return TotalTime;
	}
	public void setTotalTime(int totalTime) {
		TotalTime = totalTime;
	}
	public String getTitle() {
		return title;
	}
	public void setTitle(String title) {
		this.title = title;
	}
	public String getContent() {
		return content;
	}
	public void setContent(String content) {
		this.content = content;
	}
	public String getTag() {
		return tag;
	}
	public void setTag(String tag) {
		this.tag = tag;
	}
	public int getTotalHits() {
		return TotalHits;
	}
	public void setTotalHits(int totalHits) {
		TotalHits = totalHits;
	}
	
}
 
   7. Spring 的配置如下：
   --- applicationContext-lucene.xml
 
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd">
<beans>

<bean id="articleDAO" class="com.jushi.lucene.dao.ArticleDAOImpl" scope="singleton" >  
	<property name="dataSource" ref="articleDataSource"></property>  
</bean>

<bean id="articleService" class="com.jushi.lucene.business.ArticleServiceImpl" scope="singleton" >  
	<property name="articleDAO" ref="articleDAO"></property> 
</bean> 
  
</beans> 
   ---  applicationContext-lucene-datasource.xml
 
 
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd">
<beans>

<bean id="articleDataSource" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close">
	<property name="driverClassName" value="com.mysql.jdbc.Driver" ></property>
	<property name="url" value="jdbc:mysql://localhost/companyweb?useUnicode=true&amp;characterEncoding=utf-8" />
	<property name="username" value="root" />
	<property name="password" value="jushi" />
	<property name="initialSize" value="5" />
	<property name="maxActive" value="20" />
	<property name="maxIdle" value="20" />
	<property name="minIdle" value="5" />
</bean> 

</beans> 
 
   8. action配置 struts-config.xml
 
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE struts-config PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 1.2//EN" "http://struts.apache.org/dtds/struts-config_1_2.dtd">

<struts-config>

	<form-beans>
	   <form-bean name="lazyForm" type="org.apache.struts.validator.LazyValidatorForm"/>
    </form-beans>
    <action-mappings>
    
    	<action path="/index"
	            type="com.jushi.lucene.businessview.IndexAction"
	            name="lazyForm" parameter="m" scope="request" validate="false">
				<forward name="result" path="/result.jsp"></forward>
		</action>

	</action-mappings>
</struts-config>
 
    9. web.xml 文件配置: 监听、加载配置文件
 
 
<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.5" 
	xmlns="http://java.sun.com/xml/ns/javaee" 
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
	xsi:schemaLocation="http://java.sun.com/xml/ns/javaee 
	http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd">
	
	<context-param>
		<param-name>contextConfigLocation</param-name>
		<param-value>classpath*:/springConfig/applicationContext*.xml</param-value>
	</context-param>
	
	<!--  Character Encoding filter -->
	<filter>
          <filter-name>encodingFilter</filter-name>
          <filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class>
          <init-param>
                   <param-name>encoding</param-name>
                   <param-value>UTF-8</param-value>
          </init-param>
     </filter>
     <filter-mapping>
          <filter-name>encodingFilter</filter-name>
          <url-pattern>/*</url-pattern>
     </filter-mapping>
     
     <listener>
		<listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
	</listener>
	
	<servlet>
		<servlet-name>action</servlet-name>
		<servlet-class>org.apache.struts.action.ActionServlet</servlet-class>
		<init-param>
			<param-name>config</param-name>
			<param-value>/WEB-INF/struts-config.xml</param-value>
		</init-param>
	</servlet>

	<servlet-mapping>
		<servlet-name>action</servlet-name>
		<url-pattern>*.html</url-pattern>
	</servlet-mapping>
	
  <welcome-file-list>
    <welcome-file>index.jsp</welcome-file>
  </welcome-file-list>
</web-app>
 
   10. 前端 View 两个jsp index.jsp和 result.jsp
 
     index.jsp
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <base href="<%=basePath%>">
    
    <title>简单的lucene测试</title>
    <script type="text/javascript" src="js/CheckFunction.js"></script>
	<script type="text/javascript">
		function _search()
		{
			var form = document.form1;
			if(Trim(form.title.value) == '')
			{
				alert("输入一个关键字吧！");
				return false;
			}
			form.action = 'index.html?m=index';
			form.submit();
		}
	</script>
  </head>
  
  <body>
    	
    	<center><h1>测试Lucene</h1>
    	<hr/>
    	<form name="form1" action="index.html" method="post">
    	请输入文章的标题关键字：<input type="text" maxlength="20" name="title" size="40"/>&nbsp;
    	<button onclick="javascript:_search();return false;" style="border: thin;border-color: blue" name="search"> GO </button>
    	</form>
    	</center>
  </body>
</html>
 
    result.jsp
 
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%>
<%@ page import="com.jushi.lucene.entities.*"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<%
	List<Article> list = (List<Article>) request.getAttribute("articlelist");
	String TotalTime = "0.000";
	int TotalHits = 0;
	if(request.getAttribute("totalTime")!=null)
		TotalTime = request.getAttribute("totalTime").toString();
	if(list.size()>0 && list!=null)
		TotalHits = list.get(0).getTotalHits(); 
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <base href="<%=basePath%>">
   	<title>搜索的结果如下：</title>
   	<style type="text/css">
   		.Fee {
   			width: 650px;
   			height:28px;
   			float: right;
   			padding:1px;
   			overflow:hidden;
   			border-bottom:1px solid #bde2ff;
   			line-height:28px;
   			font-family: '宋体';
   			font-size: 15px;
   			font-weight: bold;
   		}
   		.Fee span {
   			color: red;
   			font-size:14px;
   		}
   		
   	</style>
  </head>
  
  <body>
  	<div class="Fee">共搜到 <span><%=TotalHits %></span> 条结果. 共花费：<span><%=TotalTime %> </span>秒.</div><br/><br/>
  	
    <table width="80%" height="70%" border="1" cellpadding="0" cellspacing="0" align="center">
    		<tr height="20" bgcolor="#CCCCCC">
    		<td align="center" width="20%">标题</td>
    		<td align="center" width="60%">文章内容</td>
    		<td align="center" width="20%">所属标签</td>
    		</tr>
    		<%
    		if(list.size()>0){
    		for(Article art : list){
    			String title = art.getTitle();
    			String content = art.getContent();
    			String tag = art.getTag();
    		%>
    		<tr>
    			<td><%=title %></td>
    			<td><%=content %></td>
    			<td><%=tag %></td>
    		</tr>
    		<%}}else{ %>
    		<tr>
    			<td colspan="3" align="center"><h3>对不起...没有搜到相关信息..请尝试其他关键字!</h3></td>
    		</tr>
    		<%} %>
    	</table>
    	<br/>
    	<center><a href="<%=basePath %>./">返回上一级</a></center>
  </body>
</html>
 
    12. action：IndexAction.java
 
package com.jushi.lucene.businessview;

import java.text.DecimalFormat;
import java.util.Date;
import java.util.List;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;
import org.apache.struts.actions.DispatchAction;
import org.springframework.web.context.WebApplicationContext;
import org.springframework.web.context.support.WebApplicationContextUtils;

import com.jushi.lucene.business.IArticleService;
import com.jushi.lucene.entities.Article;

/**
 * @Author fenglong
 * @CreateDate Jun 4, 2010 1:50:02 PM
 * @Version V 1.0
 * @CopyRight 2010 jushi
 */
public class IndexAction extends DispatchAction {

	public ActionForward index(ActionMapping mapping, ActionForm form,
			HttpServletRequest request, HttpServletResponse response)
			throws Exception {

		WebApplicationContext wac = WebApplicationContextUtils.getRequiredWebApplicationContext(this.getServlet().getServletContext());
		IArticleService articleService = (IArticleService) wac.getBean("articleService");

		String q = request.getParameter("title");
		DecimalFormat df = new DecimalFormat("#0.000");
		
		if(articleService.createIndex()){
			
			long begin = new Date().getTime();
			List<Article> list = articleService.getArticles(q);
			long end = new Date().getTime();
			double time = (double) (end - begin) / 1000;
			String timeStr = df.format(time);
			
			request.setAttribute("totalTime", timeStr);
			request.setAttribute("articlelist", list);
			
		}
		
		return mapping.findForward("result");
	}

}
分享到：
用Lucene构建实时索引的文档更新问题 | jquery-autocomplete学习
2011-11-04 08:57
浏览 955
评论(0)
分类:行业应用
查看更多
发表评论

您还没有登录,请您登录后再发表评论
最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene 学习

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene 学习

评论

发表评论

相关推荐

linux重启oracle 各种方法

SVN服务器配置

网页编码就是那点事

Tomcat 配置 CAS 的一个问题。unable to find valid certification path to requested target

Tomcat SSL配置大全

Html中td自动换行问题

oracle 全角、半角转换，和oracle 的 DUMP函数

tomcat sso 配置

Hibernate 利用配置文件配置HQL/SQL

CAS工作流程

jforum 表目录

Jfroum集成cas单点登录

JForum登录用例

tinyMCE 详细文档

JBPM3.1.4 相关表的 意义

JBPM环境配置

xfire 编写webservice例子

ArrayUtils常用的用法

Tomcat自动缓存context.xml，导致在eclipse中修改后，不能立即启作用

JavaScript CSS Style属性对照表

最近访客更多访客>>

JBPM3.1.4 相关表的意义