- 浏览: 138523 次
- 性别:
- 来自: 重庆
文章分类
最新评论
-
酱油男js:
不错...学习了。
oracle sql 优化大全 -
浪子秋水:
网页编码就是那点事 -
shenghuorulan:
通俗易懂。好文章,直接想到的问题都提到了。
oracle 全文检索实践 -
飞儿9530:
lucene 3.0.1 学习
1. T_ARTICLE 表 DROP TABLE IF EXISTS `t_article`; CREATE TABLE `t_article` ( `ID` varchar(32) NOT NULL default '', `ARTICLE_TITLE` varchar(255) default NULL, `ARTICLE_TAG` varchar(255) default NULL, `ARTICLE_CONTENT` text, PRIMARY KEY (`ID`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 上面的数据 随便加一下吧,主要是article_content和article_title字段的数据,article_content是个text字段,所以你可以粘贴网页的文字内容添加到本字段中。 2.业务接口 ArticleService.java package com.jushi.lucene.business; import java.util.List; import com.jushi.lucene.entities.Article; /** * @Author jushi * @CreateDate Jun 4, 2010 1:24:42 PM * @Version V 1.0 */ public interface IArticleService { public List<Article> getArticles(String query); public boolean createIndex(); } 3. 接口的实现 ArticleServiceImpl.java 主要的业务在这里实现:创建索引,搜索数据处理。 package com.jushi.lucene.business; import java.io.File; import java.io.StringReader; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.jushi.lucene.dao.IArticleDAO; import com.jushi.lucene.entities.Article; /** * @Author jushi * @CreateDate Jun 4, 2010 1:25:00 PM * @Version V 1.0 */ public class ArticleServiceImpl implements IArticleService { private IArticleDAO articleDAO; private final String INDEXPATH = "g:\\index"; private Analyzer analyzer = new StandardAnalyzer(); public List<Article> getArticles(String query) { try{ List<Article> qlist = new ArrayList<Article>(); String fieldName = "title"; IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH); //QueryParser parser = new QueryParser(fieldName, analyzer); //单 key 搜索 //Query queryOBJ = parser.parse(query); System.out.println(">>> 2.开始读取索引... ... 通过关键字:【 "+ query +" 】"); long begin = new Date().getTime(); //下面的是进行title,content 两个范围内进行收索. BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD }; Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"title","content"}, clauses, new StandardAnalyzer());//parser.parse(query); Filter filter = null; //################# 搜索相似度最高的记录 ################### TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000); //TopDocs topDocs = indexSearcher.search(queryOBJ , 10000); System.out.println("*** 共匹配:" + topDocs.totalHits + "个 ***"); Article article = null; //输出结果 for (ScoreDoc scoreDoc : topDocs.scoreDocs){ Document targetDoc = indexSearcher.doc(scoreDoc.doc); article = new Article(); //设置高亮显示格式 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'><strong>", "</strong></font>"); /* 语法高亮显示设置 */ Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(queryOBJ)); highlighter.setTextFragmenter(new SimpleFragmenter(100)); // 设置高亮 设置 title,content 字段 String title = targetDoc.get("title"); String content = targetDoc.get("content"); TokenStream titleTokenStream = analyzer.tokenStream(fieldName,new StringReader(title)); TokenStream contentTokenStream = analyzer.tokenStream("content",new StringReader(content)); String highLightTitle = highlighter.getBestFragment(titleTokenStream, title); String highLightContent = highlighter.getBestFragment(contentTokenStream, content); if(highLightTitle == null) highLightTitle = title; if(highLightContent == null) highLightContent = content; article.setTitle(highLightTitle); article.setContent(highLightContent); article.setTag(targetDoc.get("tag")); article.setTotalHits(topDocs.totalHits); qlist.add(article); } long end = new Date().getTime(); System.out.println(">>> 3.搜索完毕... ... 共花费:" + (end - begin) +"毫秒..."); indexSearcher.close(); return qlist; }catch(Exception e){ e.printStackTrace(); return null; } } public boolean createIndex() { //检查索引是否存在 if(this.isIndexExisted()) return this.isIndexExisted(); List<Article> list = articleDAO.getArticles(); try { Directory directory = FSDirectory.getDirectory(INDEXPATH); IndexWriter indexWriter = new IndexWriter(directory, analyzer ,true, IndexWriter.MaxFieldLength.LIMITED); long begin = new Date().getTime(); for(Article art: list) { Document doc = new Document(); String title = art.getTitle() == null ? "" : art.getTitle().trim(); String content = art.getContent() == null ? "" : art.getContent(); String tag = art.getTag() == null ? "" : art.getTag(); doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("tag", tag, Field.Store.COMPRESS,Field.Index.NO)); indexWriter.addDocument(doc); } long end = new Date().getTime(); System.out.println(">>> 1.存入索引完毕.. 共花费:" + (end - begin) +"毫秒..."); indexWriter.optimize(); indexWriter.close(); return true; }catch(Exception e){ e.printStackTrace(); return false; } } /** * check Index is Existed * @return true or false */ private boolean isIndexExisted() { try { File dir = new File(INDEXPATH); if(dir.listFiles().length>0) return true; else return false; }catch(Exception e){ e.printStackTrace(); return false; } } public void setArticleDAO(IArticleDAO articleDAO) { this.articleDAO = articleDAO; } } 4. DAO 接口及 DAO实现 public interface IArticleDAO { public List<Article> getArticles(); } public class ArticleDAOImpl extends JdbcDaoSupport implements IArticleDAO{ public List<Article> getArticles() { String sql = "SELECT article_title, article_tag, article_content FROM t_article"; return (List<Article>)this.getJdbcTemplate().query(sql, new ArticleRowMapper()); } } 5. 上面DAO需要的 ArticleRowMapper: 这里做了下小改动 package com.jushi.lucene.rowmapper; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import org.springframework.jdbc.core.RowMapper; import com.jushi.lucene.entities.Article; /** * @Author jushi * @CreateDate Jun 4, 2010 1:31:54 PM * @Version V 1.0 */ public class ArticleRowMapper implements RowMapper { public Object mapRow(ResultSet rs, int rowNum) throws SQLException { Article article = new Article(); ResultSetMetaData meta = rs.getMetaData(); int colNum = meta.getColumnCount(); for (int i = 1; i <= colNum; i++) { String colName = meta.getColumnName(i).toLowerCase(); if ("article_title".equals(colName)) { article.setTitle(rs.getString(i)); } else if ("article_content".equals(colName)) { article.setContent(rs.getString(i)); } else if ("article_tag".equals(colName)) { article.setTag(rs.getString(i)); //... } } return article; } } 6. Article 实体 添加了两个属性: TotalHits, TotalTime package com.jushi.lucene.entities; /** * @Author jushi * @CreateDate Jun 4, 2010 1:18:48 PM * @Version V 1.0 */ public class Article { private String title; private String content; private String tag; private int TotalHits; //命中率 private int TotalTime; //花费时间 public int getTotalTime() { return TotalTime; } public void setTotalTime(int totalTime) { TotalTime = totalTime; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getTag() { return tag; } public void setTag(String tag) { this.tag = tag; } public int getTotalHits() { return TotalHits; } public void setTotalHits(int totalHits) { TotalHits = totalHits; } } 7. Spring 的配置如下: --- applicationContext-lucene.xml <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd"> <beans> <bean id="articleDAO" class="com.jushi.lucene.dao.ArticleDAOImpl" scope="singleton" > <property name="dataSource" ref="articleDataSource"></property> </bean> <bean id="articleService" class="com.jushi.lucene.business.ArticleServiceImpl" scope="singleton" > <property name="articleDAO" ref="articleDAO"></property> </bean> </beans> --- applicationContext-lucene-datasource.xml <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd"> <beans> <bean id="articleDataSource" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close"> <property name="driverClassName" value="com.mysql.jdbc.Driver" ></property> <property name="url" value="jdbc:mysql://localhost/companyweb?useUnicode=true&characterEncoding=utf-8" /> <property name="username" value="root" /> <property name="password" value="jushi" /> <property name="initialSize" value="5" /> <property name="maxActive" value="20" /> <property name="maxIdle" value="20" /> <property name="minIdle" value="5" /> </bean> </beans> 8. action配置 struts-config.xml <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE struts-config PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 1.2//EN" "http://struts.apache.org/dtds/struts-config_1_2.dtd"> <struts-config> <form-beans> <form-bean name="lazyForm" type="org.apache.struts.validator.LazyValidatorForm"/> </form-beans> <action-mappings> <action path="/index" type="com.jushi.lucene.businessview.IndexAction" name="lazyForm" parameter="m" scope="request" validate="false"> <forward name="result" path="/result.jsp"></forward> </action> </action-mappings> </struts-config> 9. web.xml 文件配置: 监听、加载配置文件 <?xml version="1.0" encoding="UTF-8"?> <web-app version="2.5" xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"> <context-param> <param-name>contextConfigLocation</param-name> <param-value>classpath*:/springConfig/applicationContext*.xml</param-value> </context-param> <!-- Character Encoding filter --> <filter> <filter-name>encodingFilter</filter-name> <filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class> <init-param> <param-name>encoding</param-name> <param-value>UTF-8</param-value> </init-param> </filter> <filter-mapping> <filter-name>encodingFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> <listener> <listener-class>org.springframework.web.context.ContextLoaderListener</listener-class> </listener> <servlet> <servlet-name>action</servlet-name> <servlet-class>org.apache.struts.action.ActionServlet</servlet-class> <init-param> <param-name>config</param-name> <param-value>/WEB-INF/struts-config.xml</param-value> </init-param> </servlet> <servlet-mapping> <servlet-name>action</servlet-name> <url-pattern>*.html</url-pattern> </servlet-mapping> <welcome-file-list> <welcome-file>index.jsp</welcome-file> </welcome-file-list> </web-app> 10. 前端 View 两个jsp index.jsp和 result.jsp index.jsp <%@ page language="java" import="java.util.*" pageEncoding="utf-8"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>简单的lucene测试</title> <script type="text/javascript" src="js/CheckFunction.js"></script> <script type="text/javascript"> function _search() { var form = document.form1; if(Trim(form.title.value) == '') { alert("输入一个关键字吧!"); return false; } form.action = 'index.html?m=index'; form.submit(); } </script> </head> <body> <center><h1>测试Lucene</h1> <hr/> <form name="form1" action="index.html" method="post"> 请输入文章的标题关键字:<input type="text" maxlength="20" name="title" size="40"/> <button onclick="javascript:_search();return false;" style="border: thin;border-color: blue" name="search"> GO </button> </form> </center> </body> </html> result.jsp <%@ page language="java" import="java.util.*" pageEncoding="utf-8"%> <%@ page import="com.jushi.lucene.entities.*"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %> <% List<Article> list = (List<Article>) request.getAttribute("articlelist"); String TotalTime = "0.000"; int TotalHits = 0; if(request.getAttribute("totalTime")!=null) TotalTime = request.getAttribute("totalTime").toString(); if(list.size()>0 && list!=null) TotalHits = list.get(0).getTotalHits(); %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>搜索的结果如下:</title> <style type="text/css"> .Fee { width: 650px; height:28px; float: right; padding:1px; overflow:hidden; border-bottom:1px solid #bde2ff; line-height:28px; font-family: '宋体'; font-size: 15px; font-weight: bold; } .Fee span { color: red; font-size:14px; } </style> </head> <body> <div class="Fee">共搜到 <span><%=TotalHits %></span> 条结果. 共花费:<span><%=TotalTime %> </span>秒.</div><br/><br/> <table width="80%" height="70%" border="1" cellpadding="0" cellspacing="0" align="center"> <tr height="20" bgcolor="#CCCCCC"> <td align="center" width="20%">标题</td> <td align="center" width="60%">文章内容</td> <td align="center" width="20%">所属标签</td> </tr> <% if(list.size()>0){ for(Article art : list){ String title = art.getTitle(); String content = art.getContent(); String tag = art.getTag(); %> <tr> <td><%=title %></td> <td><%=content %></td> <td><%=tag %></td> </tr> <%}}else{ %> <tr> <td colspan="3" align="center"><h3>对不起...没有搜到相关信息..请尝试其他关键字!</h3></td> </tr> <%} %> </table> <br/> <center><a href="<%=basePath %>./">返回上一级</a></center> </body> </html> 12. action:IndexAction.java package com.jushi.lucene.businessview; import java.text.DecimalFormat; import java.util.Date; import java.util.List; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.struts.action.ActionForm; import org.apache.struts.action.ActionForward; import org.apache.struts.action.ActionMapping; import org.apache.struts.actions.DispatchAction; import org.springframework.web.context.WebApplicationContext; import org.springframework.web.context.support.WebApplicationContextUtils; import com.jushi.lucene.business.IArticleService; import com.jushi.lucene.entities.Article; /** * @Author fenglong * @CreateDate Jun 4, 2010 1:50:02 PM * @Version V 1.0 * @CopyRight 2010 jushi */ public class IndexAction extends DispatchAction { public ActionForward index(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception { WebApplicationContext wac = WebApplicationContextUtils.getRequiredWebApplicationContext(this.getServlet().getServletContext()); IArticleService articleService = (IArticleService) wac.getBean("articleService"); String q = request.getParameter("title"); DecimalFormat df = new DecimalFormat("#0.000"); if(articleService.createIndex()){ long begin = new Date().getTime(); List<Article> list = articleService.getArticles(q); long end = new Date().getTime(); double time = (double) (end - begin) / 1000; String timeStr = df.format(time); request.setAttribute("totalTime", timeStr); request.setAttribute("articlelist", list); } return mapping.findForward("result"); } }
发表评论
-
linux重启oracle 各种方法
2013-01-15 09:53 1320来自 : http://blog.sina.com.cn/s/ ... -
SVN服务器配置
2013-01-10 22:49 1257来自:http://air-blue55.blog.163.c ... -
网页编码就是那点事
2012-11-24 16:10 1044来自 : http://www.kuqin.com/webpa ... -
Tomcat 配置 CAS 的一个问题。unable to find valid certification path to requested target
2012-11-11 21:40 1648来自: http://blog.csdn.net/cool_ ... -
Tomcat SSL配置大全
2012-11-11 21:02 1245来源: http://www.iteye.com/topi ... -
Html中td自动换行问题
2012-10-25 17:57 1099来自:http://blog.csdn.net/dududu0 ... -
oracle 全角、半角转换,和oracle 的 DUMP函数
2012-06-18 09:56 2192to_single_byte(c)转换成半角 to_multi ... -
tomcat sso 配置
2012-05-29 23:18 1670源: http://www.oecp.cn/hi/single ... -
Hibernate 利用配置文件配置HQL/SQL
2012-05-21 17:14 1846Query.hbm.xml <?xml version= ... -
CAS工作流程
2012-05-19 22:25 1616CAS3.0的工作流程: 0.app将用户转发到CAS处, 并 ... -
jforum 表目录
2012-04-18 18:37 3047目 录 1、附件关系表JFORUM_ATTACH 2 2、附件 ... -
Jfroum集成cas单点登录
2012-04-17 21:35 16181.安装CAS 下载cas server ,设置好服务器端, ... -
JForum登录用例
2012-04-15 22:59 61671,键入用户名、密码,单击“登入”按钮,登录请求发送到/JFo ... -
tinyMCE 详细文档
2012-03-28 10:57 2164http://www.iwms.net/n2065c17.as ... -
JBPM3.1.4 相关表的 意义
2012-03-18 22:54 1036JBPM_ACTION action记录表 JBPM_DEC ... -
JBPM环境配置
2012-03-18 18:56 1864jBPM 需要数据库支持, j ... -
xfire 编写webservice例子
2012-02-23 16:51 1092来自 : http://www.blogjava.net/j ... -
ArrayUtils常用的用法
2012-02-21 10:51 1608// 1.打印数组 ArrayUtils.toString(n ... -
Tomcat自动缓存context.xml,导致在eclipse中修改后,不能立即启作用
2012-02-18 11:56 1809http://blog.csdn.net/shiqidide/ ... -
JavaScript CSS Style属性对照表
2012-02-11 22:36 1140CSS语法 (不区分大小写) JavaScript语法 (区分 ...
相关推荐
【标题】:“Lucene学习资料收集” 【描述】:Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发。这个资料集可能包含了关于如何理解和使用Lucene的各种资源,特别是通过博主huanglz19871030在iteye上的...
Lucene的基础知识 1、案例分析:什么是全文检索,如何实现全文检索 2、Lucene实现全文检索的流程 a) 创建索引 b) 查询索引 3、配置开发环境 4、创建索引库 5、查询索引库 6、分析器的分析过程 a) 测试分析器的分词...
《Lucene学习资料》 Lucene是一个开源的全文搜索引擎库,由Apache软件基金会维护。它提供了高级的文本分析和索引功能,使得开发者能够轻松地在应用程序中集成强大的搜索功能。这个资料包中的《Lucene in Action_2nd...
本文将主要围绕Java Lucene进行深入探讨,并基于提供的“Lucene学习源码.rar”文件中的“Lucene视频教程_讲解部分源码”展开讨论。 一、Lucene核心概念 1. 文档(Document):Lucene中的基本单位,用于存储待检索...
"lucene学习pdf2" 提供的文档,无疑是对Lucene深入理解的一把钥匙,它涵盖了Lucene的核心概念、操作流程以及高级特性。 首先,Lucene的基础知识是必不可少的。Lucene的核心在于索引和搜索,它将非结构化的文本数据...
【标题】:“Lucene学习-02” 在深入探讨“Lucene学习-02”这一主题之前,我们先来理解一下Lucene的核心概念。Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,广泛应用于各种搜索引擎和信息检索系统。...
**Lucene学习指南** Lucene是一个高性能、全文检索库,由Apache软件基金会开发并维护,是Java编程语言中广泛使用的搜索引擎库。它提供了一个简单的API,使得开发者能够方便地在应用中实现全文检索功能。本篇文章将...
lucene学习笔记 1 .txt lucene学习笔记 2.txt lucene学习笔记 3 .txt lucene入门实战.txt Lucene 的学习 .txt Lucene-2.0学习文档 .txt Lucene入门与使用 .txt lucene性能.txt 大富翁全文索引和查询的例子...
**Lucene学习工具包** Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发并维护。这个"Lucene学习工具包.zip"包含了学习Lucene所需的重要资料和资源,旨在帮助开发者深入理解和掌握Lucene的核心概念、功能...
**Lucene学习例子与文档详解** Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,它提供了完整的搜索功能,包括索引、查询、排序等。Lucene被广泛应用于各种需要全文检索的项目中,如网站、文档管理、...
**Lucene学习入门程序** Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发并维护。它是Java编写,可以被集成到各种应用中,提供强大的文本检索功能。本程序是针对初学者设计的,旨在帮助开发者快速理解并...
### Lucene 学习全方面剖析总结 #### Lucene 原理与应用概述 Lucene 是一个高性能、全文检索的开源库,被广泛应用于各种搜索引擎的开发之中。本篇文章旨在全面剖析 Lucene 的核心技术和应用场景,帮助读者深入理解...
**Lucene 学习笔记 1** Lucene 是一个全文搜索引擎库,由 Apache 软件基金会开发。它提供了一个可扩展的、高性能的搜索框架,使得开发者能够在其应用程序中集成高级的搜索功能。本篇学习笔记将深入探讨 Lucene 的...
标题:Lucene学习笔记 描述:Lucene学习笔记,Lucene入门必备材料 知识点: 一、Lucene概述与文档管理策略 Lucene是一款高性能、全功能的文本搜索引擎库,广泛应用于文档检索、全文搜索等场景。为了提升搜索效率...