锁定老帖子 主题:Lucene与DB结合示例
精华帖 (0) :: 良好帖 (0) :: 新手帖 (12) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2010-06-17
最后修改:2010-06-19
--- 2010-06-18 17:00 代码修改: 修改重复创建索引,添加双字段高亮。
今天发一个简单的和数据库交互的Lucene示例,只是初步的靠Lucene自带的分词实现中文分词,效果肯定没有网上琳琅的分词器相媲美,只为了示例,用了自带的高亮。页面不好看,将就一下哦。 主要是由 Spring + Struts1 + Mysql 5 实现, 只是为了实现功能。请各位大侠拍砖。 好了,不多说了。贴代码:
1. T_ARTICLE 表 DROP TABLE IF EXISTS `t_article`; CREATE TABLE `t_article` ( `ID` varchar(32) NOT NULL default '', `ARTICLE_TITLE` varchar(255) default NULL, `ARTICLE_TAG` varchar(255) default NULL, `ARTICLE_CONTENT` text, PRIMARY KEY (`ID`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 上面的数据 随便加一下吧,主要是article_content和article_title字段的数据,article_content是个text字段,所以你可以粘贴网页的文字内容添加到本字段中。
2.业务接口 ArticleService.java
package com.jushi.lucene.business; import java.util.List; import com.jushi.lucene.entities.Article; /** * @Author jushi * @CreateDate Jun 4, 2010 1:24:42 PM * @Version V 1.0 */ public interface IArticleService { public List<Article> getArticles(String query); public boolean createIndex(); } 3. 接口的实现 ArticleServiceImpl.java 主要的业务在这里实现:创建索引,搜索数据处理。
package com.jushi.lucene.business; import java.io.File; import java.io.StringReader; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import com.jushi.lucene.dao.IArticleDAO; import com.jushi.lucene.entities.Article; /** * @Author jushi * @CreateDate Jun 4, 2010 1:25:00 PM * @Version V 1.0 */ public class ArticleServiceImpl implements IArticleService { private IArticleDAO articleDAO; private final String INDEXPATH = "g:\\index"; private Analyzer analyzer = new StandardAnalyzer(); public List<Article> getArticles(String query) { try{ List<Article> qlist = new ArrayList<Article>(); String fieldName = "title"; IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH); //QueryParser parser = new QueryParser(fieldName, analyzer); //单 key 搜索 //Query queryOBJ = parser.parse(query); System.out.println(">>> 2.开始读取索引... ... 通过关键字:【 "+ query +" 】"); long begin = new Date().getTime(); //下面的是进行title,content 两个范围内进行收索. BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD }; Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"title","content"}, clauses, new StandardAnalyzer());//parser.parse(query); Filter filter = null; //################# 搜索相似度最高的记录 ################### TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000); //TopDocs topDocs = indexSearcher.search(queryOBJ , 10000); System.out.println("*** 共匹配:" + topDocs.totalHits + "个 ***"); Article article = null; //输出结果 for (ScoreDoc scoreDoc : topDocs.scoreDocs){ Document targetDoc = indexSearcher.doc(scoreDoc.doc); article = new Article(); //设置高亮显示格式 SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color='red'><strong>", "</strong></font>"); /* 语法高亮显示设置 */ Highlighter highlighter = new Highlighter(simpleHTMLFormatter,new QueryScorer(queryOBJ)); highlighter.setTextFragmenter(new SimpleFragmenter(100)); // 设置高亮 设置 title,content 字段 String title = targetDoc.get("title"); String content = targetDoc.get("content"); TokenStream titleTokenStream = analyzer.tokenStream(fieldName,new StringReader(title)); TokenStream contentTokenStream = analyzer.tokenStream("content",new StringReader(content)); String highLightTitle = highlighter.getBestFragment(titleTokenStream, title); String highLightContent = highlighter.getBestFragment(contentTokenStream, content); if(highLightTitle == null) highLightTitle = title; if(highLightContent == null) highLightContent = content; article.setTitle(highLightTitle); article.setContent(highLightContent); article.setTag(targetDoc.get("tag")); article.setTotalHits(topDocs.totalHits); qlist.add(article); } long end = new Date().getTime(); System.out.println(">>> 3.搜索完毕... ... 共花费:" + (end - begin) +"毫秒..."); indexSearcher.close(); return qlist; }catch(Exception e){ e.printStackTrace(); return null; } } public boolean createIndex() { //检查索引是否存在 if(this.isIndexExisted()) return this.isIndexExisted(); List<Article> list = articleDAO.getArticles(); try { Directory directory = FSDirectory.getDirectory(INDEXPATH); IndexWriter indexWriter = new IndexWriter(directory, analyzer ,true, IndexWriter.MaxFieldLength.LIMITED); long begin = new Date().getTime(); for(Article art: list) { Document doc = new Document(); String title = art.getTitle() == null ? "" : art.getTitle().trim(); String content = art.getContent() == null ? "" : art.getContent(); String tag = art.getTag() == null ? "" : art.getTag(); doc.add(new Field("title", title, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); doc.add(new Field("tag", tag, Field.Store.COMPRESS,Field.Index.NO)); indexWriter.addDocument(doc); } long end = new Date().getTime(); System.out.println(">>> 1.存入索引完毕.. 共花费:" + (end - begin) +"毫秒..."); indexWriter.optimize(); indexWriter.close(); return true; }catch(Exception e){ e.printStackTrace(); return false; } } /** * check Index is Existed * @return true or false */ private boolean isIndexExisted() { try { File dir = new File(INDEXPATH); if(dir.listFiles().length>0) return true; else return false; }catch(Exception e){ e.printStackTrace(); return false; } } public void setArticleDAO(IArticleDAO articleDAO) { this.articleDAO = articleDAO; } } 4. DAO 接口及 DAO实现
public interface IArticleDAO { public List<Article> getArticles(); }
public class ArticleDAOImpl extends JdbcDaoSupport implements IArticleDAO{ public List<Article> getArticles() { String sql = "SELECT article_title, article_tag, article_content FROM t_article"; return (List<Article>)this.getJdbcTemplate().query(sql, new ArticleRowMapper()); } }
5. 上面DAO需要的 ArticleRowMapper: 这里做了下小改动
package com.jushi.lucene.rowmapper; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import org.springframework.jdbc.core.RowMapper; import com.jushi.lucene.entities.Article; /** * @Author jushi * @CreateDate Jun 4, 2010 1:31:54 PM * @Version V 1.0 */ public class ArticleRowMapper implements RowMapper { public Object mapRow(ResultSet rs, int rowNum) throws SQLException { Article article = new Article(); ResultSetMetaData meta = rs.getMetaData(); int colNum = meta.getColumnCount(); for (int i = 1; i <= colNum; i++) { String colName = meta.getColumnName(i).toLowerCase(); if ("article_title".equals(colName)) { article.setTitle(rs.getString(i)); } else if ("article_content".equals(colName)) { article.setContent(rs.getString(i)); } else if ("article_tag".equals(colName)) { article.setTag(rs.getString(i)); //... } } return article; } }
6. Article 实体 添加了两个属性: TotalHits, TotalTime
package com.jushi.lucene.entities; /** * @Author jushi * @CreateDate Jun 4, 2010 1:18:48 PM * @Version V 1.0 */ public class Article { private String title; private String content; private String tag; private int TotalHits; //命中率 private int TotalTime; //花费时间 public int getTotalTime() { return TotalTime; } public void setTotalTime(int totalTime) { TotalTime = totalTime; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getContent() { return content; } public void setContent(String content) { this.content = content; } public String getTag() { return tag; } public void setTag(String tag) { this.tag = tag; } public int getTotalHits() { return TotalHits; } public void setTotalHits(int totalHits) { TotalHits = totalHits; } }
7. Spring 的配置如下: --- applicationContext-lucene.xml
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd"> <beans> <bean id="articleDAO" class="com.jushi.lucene.dao.ArticleDAOImpl" scope="singleton" > <property name="dataSource" ref="articleDataSource"></property> </bean> <bean id="articleService" class="com.jushi.lucene.business.ArticleServiceImpl" scope="singleton" > <property name="articleDAO" ref="articleDAO"></property> </bean> </beans> --- applicationContext-lucene-datasource.xml
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN 2.0//EN" "http://www.springframework.org/dtd/spring-beans-2.0.dtd"> <beans> <bean id="articleDataSource" class="org.apache.commons.dbcp.BasicDataSource" destroy-method="close"> <property name="driverClassName" value="com.mysql.jdbc.Driver" ></property> <property name="url" value="jdbc:mysql://localhost/companyweb?useUnicode=true&characterEncoding=utf-8" /> <property name="username" value="root" /> <property name="password" value="jushi" /> <property name="initialSize" value="5" /> <property name="maxActive" value="20" /> <property name="maxIdle" value="20" /> <property name="minIdle" value="5" /> </bean> </beans>
8. action配置 struts-config.xml
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE struts-config PUBLIC "-//Apache Software Foundation//DTD Struts Configuration 1.2//EN" "http://struts.apache.org/dtds/struts-config_1_2.dtd"> <struts-config> <form-beans> <form-bean name="lazyForm" type="org.apache.struts.validator.LazyValidatorForm"/> </form-beans> <action-mappings> <action path="/index" type="com.jushi.lucene.businessview.IndexAction" name="lazyForm" parameter="m" scope="request" validate="false"> <forward name="result" path="/result.jsp"></forward> </action> </action-mappings> </struts-config>
9. web.xml 文件配置: 监听、加载配置文件
<?xml version="1.0" encoding="UTF-8"?> <web-app version="2.5" xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd"> <context-param> <param-name>contextConfigLocation</param-name> <param-value>classpath*:/springConfig/applicationContext*.xml</param-value> </context-param> <!-- Character Encoding filter --> <filter> <filter-name>encodingFilter</filter-name> <filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class> <init-param> <param-name>encoding</param-name> <param-value>UTF-8</param-value> </init-param> </filter> <filter-mapping> <filter-name>encodingFilter</filter-name> <url-pattern>/*</url-pattern> </filter-mapping> <listener> <listener-class>org.springframework.web.context.ContextLoaderListener</listener-class> </listener> <servlet> <servlet-name>action</servlet-name> <servlet-class>org.apache.struts.action.ActionServlet</servlet-class> <init-param> <param-name>config</param-name> <param-value>/WEB-INF/struts-config.xml</param-value> </init-param> </servlet> <servlet-mapping> <servlet-name>action</servlet-name> <url-pattern>*.html</url-pattern> </servlet-mapping> <welcome-file-list> <welcome-file>index.jsp</welcome-file> </welcome-file-list> </web-app>
10. 前端 View 两个jsp index.jsp和 result.jsp
index.jsp <%@ page language="java" import="java.util.*" pageEncoding="utf-8"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>简单的lucene测试</title> <script type="text/javascript" src="js/CheckFunction.js"></script> <script type="text/javascript"> function _search() { var form = document.form1; if(Trim(form.title.value) == '') { alert("输入一个关键字吧!"); return false; } form.action = 'index.html?m=index'; form.submit(); } </script> </head> <body> <center><h1>测试Lucene</h1> <hr/> <form name="form1" action="index.html" method="post"> 请输入文章的标题关键字:<input type="text" maxlength="20" name="title" size="40"/> <button onclick="javascript:_search();return false;" style="border: thin;border-color: blue" name="search"> GO </button> </form> </center> </body> </html>
result.jsp
<%@ page language="java" import="java.util.*" pageEncoding="utf-8"%> <%@ page import="com.jushi.lucene.entities.*"%> <% String path = request.getContextPath(); String basePath = request.getScheme()+"://"+request.getServerName()+":"+request.getServerPort()+path+"/"; %> <% List<Article> list = (List<Article>) request.getAttribute("articlelist"); String TotalTime = "0.000"; int TotalHits = 0; if(request.getAttribute("totalTime")!=null) TotalTime = request.getAttribute("totalTime").toString(); if(list.size()>0 && list!=null) TotalHits = list.get(0).getTotalHits(); %> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> <html> <head> <base href="<%=basePath%>"> <title>搜索的结果如下:</title> <style type="text/css"> .Fee { width: 650px; height:28px; float: right; padding:1px; overflow:hidden; border-bottom:1px solid #bde2ff; line-height:28px; font-family: '宋体'; font-size: 15px; font-weight: bold; } .Fee span { color: red; font-size:14px; } </style> </head> <body> <div class="Fee">共搜到 <span><%=TotalHits %></span> 条结果. 共花费:<span><%=TotalTime %> </span>秒.</div><br/><br/> <table width="80%" height="70%" border="1" cellpadding="0" cellspacing="0" align="center"> <tr height="20" bgcolor="#CCCCCC"> <td align="center" width="20%">标题</td> <td align="center" width="60%">文章内容</td> <td align="center" width="20%">所属标签</td> </tr> <% if(list.size()>0){ for(Article art : list){ String title = art.getTitle(); String content = art.getContent(); String tag = art.getTag(); %> <tr> <td><%=title %></td> <td><%=content %></td> <td><%=tag %></td> </tr> <%}}else{ %> <tr> <td colspan="3" align="center"><h3>对不起...没有搜到相关信息..请尝试其他关键字!</h3></td> </tr> <%} %> </table> <br/> <center><a href="<%=basePath %>./">返回上一级</a></center> </body> </html>
12. action:IndexAction.java
package com.jushi.lucene.businessview; import java.text.DecimalFormat; import java.util.Date; import java.util.List; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.struts.action.ActionForm; import org.apache.struts.action.ActionForward; import org.apache.struts.action.ActionMapping; import org.apache.struts.actions.DispatchAction; import org.springframework.web.context.WebApplicationContext; import org.springframework.web.context.support.WebApplicationContextUtils; import com.jushi.lucene.business.IArticleService; import com.jushi.lucene.entities.Article; /** * @Author fenglong * @CreateDate Jun 4, 2010 1:50:02 PM * @Version V 1.0 * @CopyRight 2010 jushi */ public class IndexAction extends DispatchAction { public ActionForward index(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception { WebApplicationContext wac = WebApplicationContextUtils.getRequiredWebApplicationContext(this.getServlet().getServletContext()); IArticleService articleService = (IArticleService) wac.getBean("articleService"); String q = request.getParameter("title"); DecimalFormat df = new DecimalFormat("#0.000"); if(articleService.createIndex()){ long begin = new Date().getTime(); List<Article> list = articleService.getArticles(q); long end = new Date().getTime(); double time = (double) (end - begin) / 1000; String timeStr = df.format(time); request.setAttribute("totalTime", timeStr); request.setAttribute("articlelist", list); } return mapping.findForward("result"); } }
好了,所有的配置已经完成了。大概就是个Lucene的简单实现。只是稍微的和Spring结合了一下。功能就是搜索数据,高亮显示关键字。页面不好看,逻辑教简单。 欢迎拍砖。
声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |
发表时间:2010-06-29
![]() |
|
返回顶楼 | |
发表时间:2010-06-29
为了分布式。。。有时索引并不是存在本机上的,需要到其他的机器上去取
|
|
返回顶楼 | |
发表时间:2010-06-30
hellojinjie 写道 为了分布式。。。有时索引并不是存在本机上的,需要到其他的机器上去取
恩 对,这个就根据需要进行配置了。 |
|
返回顶楼 | |
发表时间:2010-06-30
skyuck 写道 ![]() 因为有的时候数据是以数据库形式存在的,而非单纯文件形式。故根据需要,本例只是个简单与db结合。 |
|
返回顶楼 | |
发表时间:2010-07-27
不错,支持下,下载下来跑下
|
|
返回顶楼 | |
发表时间:2010-07-27
lucene的应该还是很广泛的,特别是大数据量搜索的时候,能解决数据查询性能问题!
|
|
返回顶楼 | |
发表时间:2010-07-28
jushi1988 写道 hellojinjie 写道 为了分布式。。。有时索引并不是存在本机上的,需要到其他的机器上去取
恩 对,这个就根据需要进行配置了。 其实索引数据分离可以提高检索效率~~~~ |
|
返回顶楼 | |
发表时间:2010-07-28
harbey 写道 lucene的应该还是很广泛的,特别是大数据量搜索的时候,能解决数据查询性能问题!
恩. lucene在数据查询性能上有很大的优势。 |
|
返回顶楼 | |
浏览 9363 次