lucene3+IK分词器改造 lucene2.x+paoding

longzhun

浏览: 374509 次
性别:
来自: 北京

最近访客更多访客>>

popchild

lp164042318

promiseloney

必逍遥

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Lucene
Jquery

public void createIndex(Article article) throws Exception {
		// 实例化分词器,使用的是中文分词器
		Analyzer analyzer = new IKAnalyzer();
		// 指定要保存的文件路径并保存到FSDirectory中
		FSDirectory directory = FSDirectory.open(new File(URLDecoder.decode(
				AnalyzerAction.class.getResource("/date/index/article/")
						.toString(), "UTF-8").substring(6)));

		// true表示覆盖原来已经创建的索引,如果是false表示不覆盖，而是继续添加索引
		IndexWriter writer = new IndexWriter(directory, analyzer, false,
				IndexWriter.MaxFieldLength.UNLIMITED);

		Document doc = new Document();
		doc.add(new Field("id", String.valueOf(article.getId()),
				Field.Store.YES, Field.Index.NOT_ANALYZED));
		doc.add(new Field("article_title", article.getArticleTitle(),
				Field.Store.YES, Field.Index.ANALYZED));
		String content = FunctionUtil.Html2Text(article.getArticleContent());
		doc.add(new Field("article_content", content, Field.Store.YES,
				Field.Index.ANALYZED));
		writer.addDocument(doc);
		writer.optimize();
		writer.close();
	}

	/**
	 * 通过关键词 得到结果
	 */

	public void searchIndex(String path, String keywords) throws Exception {
		FSDirectory directory = FSDirectory.open(new File(path));
		IndexReader reader = IndexReader.open(directory);

		Searcher searcher = new IndexSearcher(directory);

		// MultiFieldQueryParser.parse中的参数分别为:
		// 1.关键词
		// 2.要查询的字段,字符串类型的数组
		String[] field = { "article_title", "article_content" };
		// 3.两个字段的关系（与或非）
		BooleanClause.Occur[] flags = new BooleanClause.Occur[] {
				BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD };
		// 4.指明分词的时候所用的分词器
		Analyzer analyzer = new IKAnalyzer();
		// Query query = MultiFieldQueryParser.parse(keywords, field, flags,
		// analyzer);
		Query query = MultiFieldQueryParser.parse(Version.LUCENE_30, keywords,
				field, flags, analyzer);
		// 由于我们目前使用的查询是多字段查询，需要匹配度的排序
		// QueryScorer内置计分器
		query.rewrite(reader);// 用于重写query对象，目的能够让计分器识别当前的query.

		// 获得结果集
		// Hits hits = searcher.search(query);
		TopDocs hits = searcher.search(query,20000);
		this.maxResultSize = String.valueOf(hits.scoreDocs.length);
		for (int i = 0; i < hits.scoreDocs.length; i++) {

			ScoreDoc sdoc = hits.scoreDocs[i];
			Document doc = searcher.doc(sdoc.doc);
			Article article = new Article();
			article.setId(Integer.valueOf(doc.get("id")));
			// title
			String title = doc.get("article_title");
			// content
			String content = doc.get("article_content");
			// 以上两项需要加亮

			// Highlighter的构造函数中需要添加两个参数
			// 1.高亮文字的格式(这个格式是基于html)
			SimpleHTMLFormatter simpleHTMLFOrmatter = new SimpleHTMLFormatter(
					"<font color=red>", "</font>");
			// 2.计分器
			Highlighter highlighter = new Highlighter(simpleHTMLFOrmatter,
					new QueryScorer(query));

			// 关键字附近字符串的截取,截取120个字
			Fragmenter fragmenter = new SimpleFragmenter(120);
			highlighter.setTextFragmenter(fragmenter);

			// 针对某个字段的加亮以及截取
			TokenStream tokenStream = analyzer.tokenStream("article_content",
					new StringReader(content));
			// 将加亮并截取的字符串取出来
			String highLightText = highlighter.getBestFragment(tokenStream,
					content);

			article.setArticleContent(highLightText);

			// 针对某个字段的加亮以及截取
			TokenStream title_tokenStream = analyzer.tokenStream(
					"article_title", new StringReader(title));
			// 将加亮并截取的字符串取出来
			String title_highLightText = highlighter.getBestFragment(
					title_tokenStream, title);
			article.setArticleTitle(title_highLightText);

			searcheResult.add(article);
		}
		reader.close();
	}

改动的地方不是很多，注意看一下

分享到：

分页检索及完善站内搜索 | 即时更新索引思路

2012-02-27 21:12
浏览 1434
评论(1)
分类:编程语言
查看更多

1 楼 dandongsoft 2013-03-08

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene3+IK分词器改造 lucene2.x+paoding

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene3+IK分词器 改造 lucene2.x+paoding

评论

发表评论

相关推荐

分页检索及完善站内搜索

即时更新索引思路

更新索引策略之一（非繁忙时刻更新）

lucene与数据表比较

lucene+paoding实现全文检索

Lucene2.4 索引库位置介绍

Lucene2.4第一个简单实例

跟我学JQuery---------------第四课

跟我学JQuery---------------第三课

跟我学JQuery---------------第二课

跟我学JQuery---------------第一课

跟我学JQuery---------------前言

最近访客更多访客>>

lucene3+IK分词器改造 lucene2.x+paoding