`
bobotc
  • 浏览: 18940 次
  • 性别: Icon_minigender_1
  • 来自: 上海
社区版块
存档分类
最新评论

lucene+ajax构建站内搜索引擎

    博客分类:
  • java
阅读更多

有点样子了,当然还有很多要优化的地方,用lucene做一个站内搜索,我用的是lucene3.0.1.

首先看看数据库,我用的新闻模拟的:新闻表

CREATE TABLE `t_newsitem` (
  `Id` int(11) NOT NULL auto_increment,
  `editor` varchar(255) default NULL,
  `newsContent` longtext,
  `newsTitle` varchar(255) default NULL,
  `publishTime` datetime default NULL,
  `resoure` varchar(255) default NULL,
  `t_newsType_id` int(11) default NULL,
  `resource` varchar(255) default NULL,
  PRIMARY KEY  (`Id`),
  KEY `FK9CB4BF1923597B2` (`t_newsType_id`),
  KEY `FK9CB4BF19FFB60BE` (`t_newsType_id`),
  CONSTRAINT `fk` FOREIGN KEY (`t_newsType_id`) REFERENCES `t_newstype` (`Id`),
  CONSTRAINT `FK9CB4BF1923597B2` FOREIGN KEY (`t_newsType_id`) REFERENCES `t_newstype` (`Id`),
  CONSTRAINT `FK9CB4BF19FFB60BE` FOREIGN KEY (`t_newsType_id`) REFERENCES `t_newstype` (`Id`)
) ENGINE=InnoDB DEFAULT CHARSET=gbk;
新闻类型表(也没啥用):

CREATE TABLE `t_newstype` (
  `Id` int(11) NOT NULL auto_increment,
  `newsTypeName` varchar(255) default NULL,
  PRIMARY KEY  (`Id`)
) ENGINE=InnoDB DEFAULT CHARSET=gbk;
系统是ssh么,站内搜索是基于它们的。

要使用lucene,至少要添加lucene-core-3.0.1.jar(核心),lucene-highlighter-3.0.1.jar(高亮显示),lucene-analyzers-3.0.1.jar(分词器)。因为它自带的分词器对中文支持不好,我使用了IKAnalyzer分词器,IKAnalyzer3.2.3Stable.jar。

做好上面的准备工作,下面就是编写下面的两个类了:

package luence;

import java.io.File;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.SimpleFSDirectory;
import org.htmlparser.Parser;
import org.wltea.analyzer.lucene.IKAnalyzer;

import service.NewsService;
import entity.TNewsitem;

/**
 * 创建索引库
 *
 * @author tqc
 *
 */
public class IndexCreate {

 String path = "C:/index";// 索引所在文件夹
 String path2 = "C:/index2";// tag索引所在文件夹

 Analyzer analyzer=new IKAnalyzer();

 NewsService service = null;

 public NewsService getService() {
  return service;
 }

 public void setService(NewsService service) {
  this.service = service;
 }

 /**
  * 创建全文新闻索引
  */
 @SuppressWarnings("unchecked")
 public void createIndexForNews() throws Exception {
  IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(new File(path)), analyzer, true,IndexWriter.MaxFieldLength.LIMITED);
  List<TNewsitem> list = service.getNews();
  DateFormat format = new SimpleDateFormat("yyyy年MM月dd日 HH时mm分ss秒");
  // 对所有的新闻实体进行索引创建
  for (TNewsitem newsItem : list) {
   Document doc = new Document();
   String newsTitle = newsItem.getNewsTitle();
   String newsContent = newsItem.getNewsContent();
   String publishDate = format.format(newsItem.getPublishTime());
   String id = newsItem.getId() + "";
   doc.add(new Field("title", newsTitle, Field.Store.YES,
     Field.Index.ANALYZED));
   Parser parser = new Parser();
   parser.setInputHTML(newsContent);
   String strings = parser.parse(null).elementAt(0)
     .toPlainTextString().trim();
   doc.add(new Field("content", strings, Field.Store.YES,
     Field.Index.ANALYZED));
   doc.add(new Field("date", publishDate, Field.Store.YES,
     Field.Index.NOT_ANALYZED));
   doc.add(new Field("id", id, Field.Store.YES, Field.Index.NO));
   indexWriter.addDocument(doc);
  }
  // 优化索引
  indexWriter.optimize();
  indexWriter.close();
 }
 /**
  * 为tag创建索引
  * @throws IOException
  * @throws LockObtainFailedException
  * @throws CorruptIndexException
  */
 @SuppressWarnings("unchecked")
 public void createIndexForTag() throws CorruptIndexException, LockObtainFailedException, IOException{
  IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(new File(path2)), analyzer, true,IndexWriter.MaxFieldLength.LIMITED);
  List<TNewsitem> list = service.getNews();
  for (TNewsitem newsItem : list) {
   Document doc = new Document();
   String tags = newsItem.getNewsTitle();
   doc.add(new Field("tags", tags, Field.Store.YES,
     Field.Index.ANALYZED));
   indexWriter.addDocument(doc);
  }
  // 优化索引
  indexWriter.optimize();
  indexWriter.close();
 }
 上面这个类主要是用来创建索引。

package luence;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import entity.SearchResultBean;

/**
 * 查询索引库
 *
 * @author tqc
 *
 */
public class IndexSearch {

 String path = "C:/index";// 索引所在文件夹
 String path2 = "C:/index2";// tag索引所在文件夹

 Analyzer analyzer = new IKAnalyzer();// 中文分词器

 /**
  * 条件查询 显示高亮效果
  *
  * @param searchParam
  * @return
  * @throws Exception
  */
 public List<SearchResultBean> getSearchResult(String searchParam)
   throws Exception {
  IndexReader reader = IndexReader.open(new SimpleFSDirectory(new File(
    path)));
  String[] fileds = { "title", "content" };// 在哪些字段中查询
  QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_30,
    fileds, analyzer);
  IndexSearcher searcher = new IndexSearcher(reader);
  Query query = parser.parse(searchParam);
  TopDocs docs = searcher.search(query, 10000);
  System.out.println("--->匹配总个数:" + docs.totalHits);
  List<SearchResultBean> list = new ArrayList<SearchResultBean>();

  Formatter formatter = new SimpleHTMLFormatter("<b><font color='red'>",
    "</font></b>");
  Scorer scorer = new QueryScorer(query);
  Highlighter highlighter = new Highlighter(formatter, scorer);
  Fragmenter fragmenter = new SimpleFragmenter(50);
  highlighter.setTextFragmenter(fragmenter);

  for (ScoreDoc scoredoc : docs.scoreDocs) {
   int docSn = scoredoc.doc;
   Document doc = searcher.doc(docSn);
   SearchResultBean srb = new SearchResultBean();
  
   String id = doc.get("id");
   String date = doc.get("date");
  
   String c = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
   if (c == null) {
    String content = doc.get("content");
    int endIndex = Math.min(100, content.length());
    c=content.substring(0,endIndex);
   }
   doc.getField("content").setValue(c);
  
   String t = highlighter.getBestFragment(analyzer, "title", doc.get("title"));
   if (t == null) {
    String title = doc.get("title");
    int endIndex = Math.min(20, title.length());
    t=title.substring(0,endIndex);
   }
   doc.getField("title").setValue(t);
  
   String content = doc.get("content");
   String title = doc.get("title");
   srb.setContent(content);
   srb.setTitle(title);
   srb.setDate(date);
   srb.setId(id);
   list.add(srb);
  }
  return list;
 }

 /**
  * 关键词查询tag索引
  *
  * @throws IOException
  * @throws CorruptIndexException
  * @throws ParseException
  */
 @SuppressWarnings("unchecked")
 public List TermQuery(String key) throws CorruptIndexException, IOException, ParseException {
  IndexReader reader = IndexReader.open(new SimpleFSDirectory(new File(
    path2)));
  IndexSearcher searcher = new IndexSearcher(reader);
  QueryParser parser = new QueryParser(Version.LUCENE_30,"tags",analyzer);
  Query query = parser.parse(key);
  System.out.println(key);
  TopDocs docs = searcher.search(query, 10000);
  System.out.println("--->匹配总个数:" + docs.totalHits);
  List<SearchResultBean> list = new ArrayList();
  for (ScoreDoc scoredoc : docs.scoreDocs) {
   int docSn = scoredoc.doc;
   Document doc = searcher.doc(docSn);
   SearchResultBean srb = new SearchResultBean();
   String tag = doc.get("tags");
   srb.setTitle(tag);
   list.add(srb);
  }
  return list;
 }

}

}
这个类用于查询索引库。

接下来在action中使用了,

/**
  * 创建索引
  *
  * @return
  * @throws Exception
  */
 public String c() throws Exception {
  indexCreate.createIndexForNews();// 创建索引
  indexCreate.createIndexForTag();// tag创建索引
  System.out.println("-->索引创建成功!");
  return "index";
 }

/**
  * 搜
  *
  * @return
  * @throws Exception
  */
 @SuppressWarnings("unchecked")
 public String s() throws Exception {
  String searchParam = ServletActionContext.getRequest().getParameter(
    "key");
  if (searchParam == "") {
   return "index";
  }
  List list = indexSearch.getSearchResult(searchParam);
  ServletActionContext.getRequest().setAttribute("res", list);
  return "index";
 }
因为客户要像百度那样的,在文本框输入后可以自动补全提示的,所以下一步我是去客户端看看了

<script type="text/javascript" src="js/jquery.js"></script>
  <script type='text/javascript' src='js/jquery.autocomplete.js'></script>
  <link type="text/css" rel="stylesheet" href="css/jquery.autocomplete.css" />

我使用的是jquery的autocomplete插件。

<script type="text/javascript">
   $(function() {
    $("#product").autocomplete("s!s", {
     minChars: 1, //最小提示字符
     width: 360, //提示框的长度
     autoFill: false, //不自动填充
     multiple: false, //不允许多个自动填充值出现
     dataType: "json", //数据类型
     parse: function(data) { //解析数据
      return $.map(data, function(row) {
       return {
        data: row,
        value: row.value,
        result: row.value //显示在文本框里面的格式
       }
      });
     },
     formatItem: function(row, i, max) { //格式化显示的内容
      return row.value+"&nbsp;&nbsp;&nbsp;&nbsp;第"+i+"条记录,共"+max+"条";
     },
     formatMatch: function(row, i, max) {
      return row.name + " " + row.value;
     },
     formatResult: function(row) {
      return row.value;
     }
    });
   });
  </script>

因为是ajax的,给看看后台是如何处理的,

/**
  * 完成自动补全
  *
  * @return
  * @throws Exception
  */
 @SuppressWarnings("unchecked")
 public String s() throws Exception {
  ServletActionContext.getResponse().setContentType(
    "text/json;charset=UTF-8");
  HttpServletRequest request = ServletActionContext.getRequest();
  String key = request.getParameter("q");
  key = new String(key.getBytes("ISO-8859-1"), "utf-8");
  List<SearchResultBean> list = indexSearch.TermQuery(key);
  StringBuffer sg = new StringBuffer();
  sg.append("[");
  for (int i = 0; i < list.size(); i++) {
   SearchResultBean s = list.get(i);
   if (i == list.size() - 1) {
    sg.append("{name:'eee',value:'" + s.getTitle() + "'}");
   } else {
    sg.append("{name:'eee',value:'" + s.getTitle() + "'},");
   }
  }
  sg.append("]");
  ServletActionContext.getResponse().getWriter().print(sg.toString());
  return null;
 }
返回的json格式。到目前基本上就搞定了!

0
4
分享到:
评论
1 楼 walong2012 2012-06-17  
为什么不直接给我们一个工程出来,我们可以直接运行

相关推荐

Global site tag (gtag.js) - Google Analytics