`
yupengcc
  • 浏览: 137443 次
  • 性别: Icon_minigender_1
  • 来自: 重庆
社区版块
存档分类
最新评论

lucene 学习

阅读更多

 

  1. T_ARTICLE 表
DROP TABLE IF EXISTS `t_article`;
CREATE TABLE `t_article` (
  `ID` varchar(32) NOT NULL default '',
  `ARTICLE_TITLE` varchar(255) default NULL,
  `ARTICLE_TAG` varchar(255) default NULL,
  `ARTICLE_CONTENT` text,
  PRIMARY KEY  (`ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
    上面的数据 随便加一下吧,主要是article_content和article_title字段的数据,article_content是个text字段,所以你可以粘贴网页的文字内容添加到本字段中。
 
    2.业务接口 ArticleService.java
 
package com.jushi.lucene.business;

import java.util.List;
import com.jushi.lucene.entities.Article;
/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:24:42 PM
 * @Version V 1.0
 */

public interface IArticleService {

	public List<Article> getArticles(String query);

	public boolean createIndex();

}


   3. 接口的实现 ArticleServiceImpl.java 主要的业务在这里实现:创建索引,搜索数据处理。
 
package com.jushi.lucene.business;

import java.io.File;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.jushi.lucene.dao.IArticleDAO;
import com.jushi.lucene.entities.Article;

/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:25:00 PM
 * @Version V 1.0
 */
public class ArticleServiceImpl implements IArticleService {
	
	private IArticleDAO articleDAO;
	
	private final String INDEXPATH = "g:\\index";
	private Analyzer analyzer = new StandardAnalyzer();
	
	public List<Article> getArticles(String query) {
		
		try{
		List<Article> qlist = new ArrayList<Article>();
		String fieldName = "title";
		IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH);
		
		//QueryParser parser = new QueryParser(fieldName, analyzer); //单 key 搜索
		//Query queryOBJ = parser.parse(query);
		System.out.println(">>> 2.开始读取索引... ... 通过关键字:【 "+ query +" 】");
		long begin = new Date().getTime();
		
		//下面的是进行title,content 两个范围内进行收索.
		BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD };
		Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"title","content"}, clauses, new StandardAnalyzer());//parser.parse(query);
		Filter filter = null;
		
		//################# 搜索相似度最高的记录 ###################
		TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000);
		//TopDocs topDocs = indexSearcher.search(queryOBJ , 10000);
		System.out.println("*** 共匹配:" + topDocs.totalHits + "个 ***");
		
		Article article = null;
		
		//输出结果
		for (ScoreDoc scoreDoc : topDocs.scoreDocs){
				Document targetDoc = indexSearcher.doc(scoreDoc.doc);
				article = new Article();
				
				//设置高亮显示格式
				SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'><strong>", "</strong></font>"); 
				/* 语法高亮显示设置 */
				Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(queryOBJ)); 
				highlighter.setTextFragmenter(new SimpleFragmenter(100)); 
				
				// 设置高亮 设置 title,content 字段
				String title = targetDoc.get("title");
				String content = targetDoc.get("content");
				TokenStream titleTokenStream = analyzer.tokenStream(fieldName,new StringReader(title));
				TokenStream contentTokenStream = analyzer.tokenStream("content",new StringReader(content));
				String highLightTitle = highlighter.getBestFragment(titleTokenStream, title);
				String highLightContent = highlighter.getBestFragment(contentTokenStream, content);
				
			     if(highLightTitle == null)
			    	 highLightTitle = title;

			     if(highLightContent == null) 
			    	 highLightContent = content;
			     
			    article.setTitle(highLightTitle);
				article.setContent(highLightContent);
				article.setTag(targetDoc.get("tag"));
				article.setTotalHits(topDocs.totalHits);
				
				qlist.add(article);
		}
		
		long end = new Date().getTime();
		System.out.println(">>> 3.搜索完毕... ... 共花费:" + (end - begin) +"毫秒...");
		
		indexSearcher.close();
		
		return qlist;
		
		}catch(Exception e){
			e.printStackTrace();
			return null;
		}
	}
	
	public boolean createIndex()
	{
		//检查索引是否存在
		if(this.isIndexExisted())
			return this.isIndexExisted();
		
		List<Article> list = articleDAO.getArticles();
		
		try
		{
			Directory directory = FSDirectory.getDirectory(INDEXPATH);
			IndexWriter indexWriter = new IndexWriter(directory, analyzer ,true, IndexWriter.MaxFieldLength.LIMITED);
			
			long begin = new Date().getTime();
			for(Article art: list)
			{
				Document doc = new Document();
				String title = art.getTitle() == null ? "" : art.getTitle().trim();
				String content = art.getContent() == null ? "" : art.getContent();
				String tag = art.getTag() == null ? "" : art.getTag();
				doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
				doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
				doc.add(new Field("tag", tag, Field.Store.COMPRESS,Field.Index.NO));
				indexWriter.addDocument(doc);
			}
			long end = new Date().getTime();
			System.out.println(">>> 1.存入索引完毕.. 共花费:" + (end - begin) +"毫秒...");
			
			indexWriter.optimize();
			indexWriter.close();
			return true;
			
		}catch(Exception e){
			e.printStackTrace();
			return false;
		}
	}
	
	/**
	 * check Index is Existed
	 * @return true or false
	 */
	private boolean isIndexExisted()
	{
		try
		{
			File dir = new File(INDEXPATH);
			if(dir.listFiles().length>0)
				return true;
			else
				return false;
			
		}catch(Exception e){
			e.printStackTrace();
			return false;
		}
	}
	
	public void setArticleDAO(IArticleDAO articleDAO) {
		this.articleDAO = articleDAO;
	}

}

    4. DAO 接口及 DAO实现 
 
public interface IArticleDAO {

	public List<Article> getArticles();
}
 
 
public class ArticleDAOImpl extends JdbcDaoSupport implements IArticleDAO{

	public List<Article> getArticles() {
		String sql = "SELECT article_title, article_tag, article_content FROM t_article";
		return (List<Article>)this.getJdbcTemplate().query(sql, new ArticleRowMapper());
	}

}
 
   5. 上面DAO需要的 ArticleRowMapper: 这里做了下小改动
 
package com.jushi.lucene.rowmapper;

import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;

import org.springframework.jdbc.core.RowMapper;

import com.jushi.lucene.entities.Article;

/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:31:54 PM
 * @Version V 1.0
 */
public class ArticleRowMapper implements RowMapper {

	public Object mapRow(ResultSet rs, int rowNum) throws SQLException {
		Article article = new Article();
		ResultSetMetaData meta = rs.getMetaData();
		int colNum = meta.getColumnCount();
		for (int i = 1; i <= colNum; i++) {
			String colName = meta.getColumnName(i).toLowerCase();
			if ("article_title".equals(colName)) {
				article.setTitle(rs.getString(i));
			} else if ("article_content".equals(colName)) {
				article.setContent(rs.getString(i));
			} else if ("article_tag".equals(colName)) {
				article.setTag(rs.getString(i));
				//...
			}
		}
		return article;
	}
}
 
   6. Article 实体 添加了两个属性: TotalHits, TotalTime
 
package com.jushi.lucene.entities;

/**
 * @Author jushi
 * @CreateDate Jun 4, 2010 1:18:48 PM
 * @Version V 1.0
 */
public class Article {

	private String title;
	private String content;
	private String tag;
	private int TotalHits; //命中率
	private int TotalTime; //花费时间
	
	public int getTotalTime() {
		return TotalTime;
	}
	public void setTotalTime(int totalTime) {
		TotalTime = totalTime;
	}
	public String getTitle() {
		return title;
	}
	public void setTitle(String title) {
		this.title = title;
	}
	public String getContent() {
		return content;
	}
	public void setContent(String content) {
		this.content = content;
	}
	public String getTag() {
		return tag;
	}
	public void setTag(String tag) {
		this.tag = tag;
	}
	public int getTotalHits() {
		return TotalHits;
	}
	public void setTotalHits(int totalHits) {
		TotalHits = totalHits;
	}
	
}
 
   7. Spring 的配置如下:
   --- applicationContext-lucene.xml
 
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd">
<beans>

<bean id="articleDAO" class="com.jushi.lucene.dao.ArticleDAOImpl" scope="singleton" >  
	<property name="dataSource" ref="articleDataSource"></property>  
</bean>

<bean id="articleService" class="com.jushi.lucene.business.ArticleServiceImpl" scope="singleton" >  
	<property name="articleDAO" ref="articleDAO"></property> 
</bean> 
  
</beans> 
   ---  applicationContext-lucene-datasource.xml
 
 
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd">
<beans>

<bean id="articleDataSource" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close">
	<property name="driverClassName" value="com.mysql.jdbc.Driver" ></property>
	<property name="url" value="jdbc:mysql://localhost/companyweb?useUnicode=true&amp;characterEncoding=utf-8" />
	<property name="username" value="root" />
	<property name="password" value="jushi" />
	<property name="initialSize" value="5" />
	<property name="maxActive" value="20" />
	<property name="maxIdle" value="20" />
	<property name="minIdle" value="5" />
</bean> 

</beans> 
 
   8. action配置 struts-config.xml
 
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE struts-config PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 1.2//EN" "http://struts.apache.org/dtds/struts-config_1_2.dtd">

<struts-config>

	<form-beans>
	   <form-bean name="lazyForm" type="org.apache.struts.validator.LazyValidatorForm"/>
    </form-beans>
    <action-mappings>
    
    	<action path="/index"
	            type="com.jushi.lucene.businessview.IndexAction"
	            name="lazyForm" parameter="m" scope="request" validate="false">
				<forward name="result" path="/result.jsp"></forward>
		</action>

	</action-mappings>
</struts-config>
 
    9. web.xml 文件配置: 监听、加载配置文件
 
 
<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.5" 
	xmlns="http://java.sun.com/xml/ns/javaee" 
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
	xsi:schemaLocation="http://java.sun.com/xml/ns/javaee 
	http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd">
	
	<context-param>
		<param-name>contextConfigLocation</param-name>
		<param-value>classpath*:/springConfig/applicationContext*.xml</param-value>
	</context-param>
	
	<!--  Character Encoding filter -->
	<filter>
          <filter-name>encodingFilter</filter-name>
          <filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class>
          <init-param>
                   <param-name>encoding</param-name>
                   <param-value>UTF-8</param-value>
          </init-param>
     </filter>
     <filter-mapping>
          <filter-name>encodingFilter</filter-name>
          <url-pattern>/*</url-pattern>
     </filter-mapping>
     
     <listener>
		<listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
	</listener>
	
	<servlet>
		<servlet-name>action</servlet-name>
		<servlet-class>org.apache.struts.action.ActionServlet</servlet-class>
		<init-param>
			<param-name>config</param-name>
			<param-value>/WEB-INF/struts-config.xml</param-value>
		</init-param>
	</servlet>

	<servlet-mapping>
		<servlet-name>action</servlet-name>
		<url-pattern>*.html</url-pattern>
	</servlet-mapping>
	
  <welcome-file-list>
    <welcome-file>index.jsp</welcome-file>
  </welcome-file-list>
</web-app>
 
   10. 前端 View 两个jsp index.jsp和 result.jsp
 
     index.jsp
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <base href="<%=basePath%>">
    
    <title>简单的lucene测试</title>
    <script type="text/javascript" src="js/CheckFunction.js"></script>
	<script type="text/javascript">
		function _search()
		{
			var form = document.form1;
			if(Trim(form.title.value) == '')
			{
				alert("输入一个关键字吧!");
				return false;
			}
			form.action = 'index.html?m=index';
			form.submit();
		}
	</script>
  </head>
  
  <body>
    	
    	<center><h1>测试Lucene</h1>
    	<hr/>
    	<form name="form1" action="index.html" method="post">
    	请输入文章的标题关键字:<input type="text" maxlength="20" name="title" size="40"/>&nbsp;
    	<button onclick="javascript:_search();return false;" style="border: thin;border-color: blue" name="search"> GO </button>
    	</form>
    	</center>
  </body>
</html>
 
    result.jsp
 
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%>
<%@ page import="com.jushi.lucene.entities.*"%>
<%
String path = request.getContextPath();
String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/";
%>
<%
	List<Article> list = (List<Article>) request.getAttribute("articlelist");
	String TotalTime = "0.000";
	int TotalHits = 0;
	if(request.getAttribute("totalTime")!=null)
		TotalTime = request.getAttribute("totalTime").toString();
	if(list.size()>0 && list!=null)
		TotalHits = list.get(0).getTotalHits(); 
%>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <base href="<%=basePath%>">
   	<title>搜索的结果如下:</title>
   	<style type="text/css">
   		.Fee {
   			width: 650px;
   			height:28px;
   			float: right;
   			padding:1px;
   			overflow:hidden;
   			border-bottom:1px solid #bde2ff;
   			line-height:28px;
   			font-family: '宋体';
   			font-size: 15px;
   			font-weight: bold;
   		}
   		.Fee span {
   			color: red;
   			font-size:14px;
   		}
   		
   	</style>
  </head>
  
  <body>
  	<div class="Fee">共搜到 <span><%=TotalHits %></span> 条结果. 共花费:<span><%=TotalTime %> </span>秒.</div><br/><br/>
  	
    <table width="80%" height="70%" border="1" cellpadding="0" cellspacing="0" align="center">
    		<tr height="20" bgcolor="#CCCCCC">
    		<td align="center" width="20%">标题</td>
    		<td align="center" width="60%">文章内容</td>
    		<td align="center" width="20%">所属标签</td>
    		</tr>
    		<%
    		if(list.size()>0){
    		for(Article art : list){
    			String title = art.getTitle();
    			String content = art.getContent();
    			String tag = art.getTag();
    		%>
    		<tr>
    			<td><%=title %></td>
    			<td><%=content %></td>
    			<td><%=tag %></td>
    		</tr>
    		<%}}else{ %>
    		<tr>
    			<td colspan="3" align="center"><h3>对不起...没有搜到相关信息..请尝试其他关键字!</h3></td>
    		</tr>
    		<%} %>
    	</table>
    	<br/>
    	<center><a href="<%=basePath %>./">返回上一级</a></center>
  </body>
</html>
 
    12. action:IndexAction.java
 
package com.jushi.lucene.businessview;

import java.text.DecimalFormat;
import java.util.Date;
import java.util.List;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;
import org.apache.struts.actions.DispatchAction;
import org.springframework.web.context.WebApplicationContext;
import org.springframework.web.context.support.WebApplicationContextUtils;

import com.jushi.lucene.business.IArticleService;
import com.jushi.lucene.entities.Article;

/**
 * @Author fenglong
 * @CreateDate Jun 4, 2010 1:50:02 PM
 * @Version V 1.0
 * @CopyRight 2010 jushi
 */
public class IndexAction extends DispatchAction {

	public ActionForward index(ActionMapping mapping, ActionForm form,
			HttpServletRequest request, HttpServletResponse response)
			throws Exception {

		WebApplicationContext wac = WebApplicationContextUtils.getRequiredWebApplicationContext(this.getServlet().getServletContext());
		IArticleService articleService = (IArticleService) wac.getBean("articleService");

		String q = request.getParameter("title");
		DecimalFormat df = new DecimalFormat("#0.000");
		
		if(articleService.createIndex()){
			
			long begin = new Date().getTime();
			List<Article> list = articleService.getArticles(q);
			long end = new Date().getTime();
			double time = (double) (end - begin) / 1000;
			String timeStr = df.format(time);
			
			request.setAttribute("totalTime", timeStr);
			request.setAttribute("articlelist", list);
			
		}
		
		return mapping.findForward("result");
	}

}
分享到:
评论

相关推荐

    lucene学习资料收集

    【标题】:“Lucene学习资料收集” 【描述】:Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发。这个资料集可能包含了关于如何理解和使用Lucene的各种资源,特别是通过博主huanglz19871030在iteye上的...

    lucene学习

    Lucene的基础知识 1、案例分析:什么是全文检索,如何实现全文检索 2、Lucene实现全文检索的流程 a) 创建索引 b) 查询索引 3、配置开发环境 4、创建索引库 5、查询索引库 6、分析器的分析过程 a) 测试分析器的分词...

    lucene学习资料

    《Lucene学习资料》 Lucene是一个开源的全文搜索引擎库,由Apache软件基金会维护。它提供了高级的文本分析和索引功能,使得开发者能够轻松地在应用程序中集成强大的搜索功能。这个资料包中的《Lucene in Action_2nd...

    Lucene学习源码.rar

    本文将主要围绕Java Lucene进行深入探讨,并基于提供的“Lucene学习源码.rar”文件中的“Lucene视频教程_讲解部分源码”展开讨论。 一、Lucene核心概念 1. 文档(Document):Lucene中的基本单位,用于存储待检索...

    lucene学习pdf2

    "lucene学习pdf2" 提供的文档,无疑是对Lucene深入理解的一把钥匙,它涵盖了Lucene的核心概念、操作流程以及高级特性。 首先,Lucene的基础知识是必不可少的。Lucene的核心在于索引和搜索,它将非结构化的文本数据...

    lucene学习-02

    【标题】:“Lucene学习-02” 在深入探讨“Lucene学习-02”这一主题之前,我们先来理解一下Lucene的核心概念。Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,广泛应用于各种搜索引擎和信息检索系统。...

    Lucene的的学习资料及案例

    **Lucene学习指南** Lucene是一个高性能、全文检索库,由Apache软件基金会开发并维护,是Java编程语言中广泛使用的搜索引擎库。它提供了一个简单的API,使得开发者能够方便地在应用中实现全文检索功能。本篇文章将...

    【大搜集:lucene学习资料】---<下载不扣分,回帖加1分,欢迎下载,童叟无欺>

    lucene学习笔记 1 .txt lucene学习笔记 2.txt lucene学习笔记 3 .txt lucene入门实战.txt Lucene 的学习 .txt Lucene-2.0学习文档 .txt Lucene入门与使用 .txt lucene性能.txt 大富翁全文索引和查询的例子...

    Lucene学习工具包.zip

    **Lucene学习工具包** Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发并维护。这个"Lucene学习工具包.zip"包含了学习Lucene所需的重要资料和资源,旨在帮助开发者深入理解和掌握Lucene的核心概念、功能...

    Lucene学习例子与文档

    **Lucene学习例子与文档详解** Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,它提供了完整的搜索功能,包括索引、查询、排序等。Lucene被广泛应用于各种需要全文检索的项目中,如网站、文档管理、...

    lucene学习入门程序

    **Lucene学习入门程序** Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发并维护。它是Java编写,可以被集成到各种应用中,提供强大的文本检索功能。本程序是针对初学者设计的,旨在帮助开发者快速理解并...

    lucene学习全方面剖析总结

    ### Lucene 学习全方面剖析总结 #### Lucene 原理与应用概述 Lucene 是一个高性能、全文检索的开源库,被广泛应用于各种搜索引擎的开发之中。本篇文章旨在全面剖析 Lucene 的核心技术和应用场景,帮助读者深入理解...

    Lucene 学习笔记 1

    **Lucene 学习笔记 1** Lucene 是一个全文搜索引擎库,由 Apache 软件基金会开发。它提供了一个可扩展的、高性能的搜索框架,使得开发者能够在其应用程序中集成高级的搜索功能。本篇学习笔记将深入探讨 Lucene 的...

    lucene学习笔记

    标题:Lucene学习笔记 描述:Lucene学习笔记,Lucene入门必备材料 知识点: 一、Lucene概述与文档管理策略 Lucene是一款高性能、全功能的文本搜索引擎库,广泛应用于文档检索、全文搜索等场景。为了提升搜索效率...

Global site tag (gtag.js) - Google Analytics