lucene

thrillerzw

浏览: 143797 次
性别:
来自: 北京

最近访客更多访客>>

日出斯图加特

yangleleaa

smxly53

米糠杰

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

搜索

1、介绍

2、demo

public class Demo {
	//索引
	public static void indexDemo(String indexPath) {
		try {
			// 标准单字分词器
			//	Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
			//中文分词
			Analyzer analyzer = new IKAnalyzer();
			// 准备建立的索引路径
			Directory dir = FSDirectory.open(new File(indexPath));
			// 索引配置
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
					analyzer);
			// 索引器
			IndexWriter iw = new IndexWriter(dir, config);
			// 文档
			Document doc = new Document();   
			// 文档字段 
			IndexableField idField = new StringField("id", "1", Field.Store.YES);
			// StringField 不分词索引，TextField 分词索引
			IndexableField titleField = new StringField("title", "thriller北京的空气",
					Field.Store.YES);
			IndexableField contentField = new TextField("content", "thriller 加油，天天快乐",
					Field.Store.YES);
			// 字段添加到文档
			doc.add(idField);
			doc.add(titleField);
			doc.add(contentField);
			// 建立索引
			iw.addDocument(doc);
			//合并索引，最多3个段
			iw.forceMerge(3);
			iw.commit();
			iw.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	//查询
	public static void searcherDemo(String indexPath) {
		try {
			// 存放索引目录
			Directory dir = FSDirectory.open(new File(indexPath));
			// 读索引
			IndexReader reader = DirectoryReader.open(dir);
			IndexSearcher is = new IndexSearcher(reader);
			// 最简单查询，从指定字段查词
			// Query query = new TermQuery(new Term("title","t1"));
			Query query = new TermQuery(new Term("id", "1"));
			// 查数字 IntField
//			 Query query = NumericRangeQuery.newIntRange("id", 1, 4, true,
//			 true);
			TopDocs topDocs = is.search(query, 10);
			// 结果总数
			int hits = topDocs.totalHits;
			System.out.println("hits:" + hits);
			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
			for (ScoreDoc sd : scoreDocs) {
				// 文档id
				int docID = sd.doc;
				// 根据id查文档
				Document doc = is.doc(docID);
				// 根据字段获取值
				System.out.println(doc.get("id") + ":" + doc.get("title") + ":"
						+ doc.get("content"));
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	//删除索引
	public static void deleteIndex(String indexPath) {
		try {
			//中文分词
			//Analyzer analyzer = new IKAnalyzer();
			// 标准单字分词器
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);
			Directory dir = FSDirectory.open(new File(indexPath));
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
					analyzer);
			IndexWriter iw = new IndexWriter(dir, config);
			iw.deleteAll();
			//iw.deleteDocuments(new Term("content", "快乐"));
			iw.commit();
			iw.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	/**
	 * 更新数据
	 * 更新结果：1:thriller修改标题:null
	 */
	public static void updateIndex(String indexPath) {
		try {
			Analyzer analyzer = new IKAnalyzer();
			Directory dir = FSDirectory.open(new File(indexPath));
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
					analyzer);
			IndexWriter iw = new IndexWriter(dir, config);
			Document doc = new Document();
			//没有这行，会删除记录，没有更新，why??  
			IndexableField idField = new StringField("id", "1", Field.Store.YES);
			IndexableField titlefield = new TextField("title", "thriller修改标题", Field.Store.YES);
			doc.add(idField);
			doc.add(titlefield);
			iw.updateDocument(new Term("id", "1"), doc);
			iw.commit();
			iw.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	//高亮
	public static void highlightSearcherDemo(String indexPath){
		try {
			Analyzer analyzer = new IKAnalyzer();
			Directory dir = FSDirectory.open(new File(indexPath));
			IndexReader reader = DirectoryReader.open(dir);
			IndexSearcher is = new IndexSearcher(reader);
			Query query = new TermQuery(new Term("content","快乐"));
			
			String preTag = "<font color = \"red\" >";
			String postTag = "</font>";
			Formatter formatter = new SimpleHTMLFormatter(preTag,postTag);
			Scorer fragmentScorer = new QueryScorer(query);
			Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
			Fragmenter fragmenter = new SimpleFragmenter(30);
			highlighter.setTextFragmenter(fragmenter);
			
			TopDocs topDocs = is.search(query, 10);
			int hits = topDocs.totalHits;
			System.out.println("hits:"+hits);
			ScoreDoc[] scoreDoc = topDocs.scoreDocs;
			for(ScoreDoc sd : scoreDoc){
				int docID = sd.doc;
				Document doc = is.doc(docID);
				String hid = highlighter.getBestFragment(analyzer, "id",doc.get("id"));
				String ht = highlighter.getBestFragment(analyzer, "title",doc.get("title"));
				String hc = highlighter.getBestFragment(analyzer, "content",doc.get("content"));
				if(hid == null){
					hid = doc.get("id");
				}
				if(ht == null){
					ht = doc.get("title");
				}
				if(hc == null){
					hc = doc.get("content");
				}
				System.out.println(hid+":"+ht+":"+hc);
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
}

3、Lucene学习——IKAnalyzer中文分词（一）: http://blog.csdn.net/cyxlzzs/article/details/7999212

4、问题

ik分词：java.lang.IllegalStateException: TokenStream contract violation: reset()/close() call missing, reset() called multiple times, or subclass does not call super.reset(). Please see Javadocs of TokenStream class for more information about the correct consuming workflow.

解决：在循环while (tokenStream.incrementToken()) 之前加入：tokenStream.reset();

分享到：

spring3+struts2+mybatis3+velocity1.7+sit ... | 路由器

2014-03-05 23:08
浏览 1174
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene

评论

发表评论

相关推荐

源码分析ik歧义处理

源码分析ik分词主流程

ik分词

solr

最近访客更多访客>>