Luence 初探

zhcheng
浏览: 164993 次
性别:
来自: 北京
最近访客更多访客>>

etc123
JavaLike
157790077
sytcun
博主相关

博客
微博
相册
留言
关于我
文章分类

社区版块

存档分类

博客分类：
apache
/**
 * lucene-core-3.3.0
 * lucene-highlighter-3.3.0
 */
public class LuceneIndex {

	//分词器
	private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33);
	//索引文件存放位置
	private final String indexPath = "/home/zhanghc/luence/index/";

	/**
	 * 创建索引 
	 */
	public boolean createIndex() throws IOException {

		Directory directory = FSDirectory.open(new File(indexPath));
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer);
		config.setOpenMode(OpenMode.CREATE_OR_APPEND);
		IndexWriter writer = new IndexWriter(directory, config);
		String [] array_content = {"在泽州县彤康食品有限公司", "屠宰加工项目建成投产", "比如肉制品均来自双汇", "雨润等大型肉类生产商"};
		String [] array_name = {"泽州", "屠宰", "肉制品", "肉类生产商"};
		for (int i = 0; i < 4; i++) {
			Document document = new Document();
			document.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
			document.add(new Field("name", array_name[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
			document.add(new Field("content", array_content[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
			writer.addDocument(document);
		}
		writer.optimize();
		writer.close();
		
		return true;
	}
	
	/**
	 * 删除索引 
	 * @throws ParseException 
	 */
	public void deletes() throws IOException, ParseException{
		
		Directory directory = FSDirectory.open(new File(indexPath));
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer);
		config.setOpenMode(OpenMode.CREATE_OR_APPEND);
		IndexWriter writer = new IndexWriter(directory, config);
		Term term = new Term("id", "0");
		writer.deleteDocuments(term);
//		Query query = new QueryParser(Version.LUCENE_33, "id", new StandardAnalyzer(Version.LUCENE_33)).parse("2");
//		writer.deleteDocuments(query);
		writer.optimize();
		System.out.println(writer.numDocs());
		writer.close();
	}
	
	/**
	 * 更新索引 
	 * @throws IOException 
	 */
	public void updates() throws IOException{
		
		Directory directory = FSDirectory.open(new File(indexPath));
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, analyzer);
		config.setOpenMode(OpenMode.CREATE_OR_APPEND);
		IndexWriter writer = new IndexWriter(directory, config);
		String [] array_content = {"索引的删除，纠结了很久，看到一篇总结不错的文章，转载过来好好学习", "即使在不关闭IndexReader的情况下"};
		String [] array_name = {"索引", "关闭"};
		for (int i = 0; i < 2; i++) {
			Document document = new Document();
			Term term = new Term("id", String.valueOf(i));
			document.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES));
			document.add(new Field("name", array_name[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
			document.add(new Field("content", array_content[i], Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES));
			writer.updateDocument(term, document);
			//Updates a document by first deleting the document(s) containing term and then adding the new document. The delete and then add are atomic as seen by a reader on the same index (flush may happen only after the add).
		}
		System.out.println(writer.numDocs());
		writer.optimize();
		writer.close();
	}
	
	/**
	 * 单条件查询
	 */
	public void search(String val) throws CorruptIndexException, IOException, ParseException{
		
		IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true);
		final String field = "name";
		QueryParser queryParser = new QueryParser(Version.LUCENE_33, field, analyzer);
		Query query = queryParser.parse(val);
		//设置高亮显示
		//设置高亮显示格式
		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>");
		//语法高亮显示设置
		Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
		
		highlighter.setTextFragmenter(new SimpleFragmenter(100));//100似乎是表示最终输出字的个数
		
		
		TopDocs result = search.search(query, 10);// 10是显示队列的Size
		System.out.println("搜索结果，搜索条数为：" + result.totalHits);
		for (ScoreDoc item : result.scoreDocs) {
			Document document = search.doc(item.doc);
			String out = document.get(field);
			//搜索结果
			TokenStream tream = analyzer.tokenStream(field, new StringReader(out));
			try {
				System.out.println(highlighter.getBestFragment(tream, out));
			} catch (InvalidTokenOffsetsException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		search.close();
	}
	
	/**
	 * 多条件查询 
	 * @throws IOException 
	 * @throws CorruptIndexException 
	 * @throws ParseException 
	 */
	public void mutilSearch(String val) throws CorruptIndexException, IOException, ParseException{

		IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true);
		BooleanClause.Occur [] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
		//BooleanClause.Occur [] 表示多个条件之间的关系，
		//BooleanClause.Occur.MUST表示必须含有
		//BooleanClause.Occur.MUST_NOT表示不含有
		//BooleanClause.Occur.SHOULDb表示含不含有均可
		final String [] fields = {"name", "content"};
		//根据 name content两个属性查询
		Query query = MultiFieldQueryParser.parse(Version.LUCENE_33, val, fields, clauses, analyzer);
		
		//设置高亮显示
		//设置高亮显示格式
		SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>");
		//语法高亮显示设置
		Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
		highlighter.setTextFragmenter(new SimpleFragmenter(100));//100是高亮范围
		
		TopDocs result = search.search(query, 10);// 10是显示队列的Size
		System.out.println("搜索结果，搜索条数为：" + result.totalHits);
		for (ScoreDoc item : result.scoreDocs) {
			Document document = search.doc(item.doc);
			String out_name = document.get("name");
			String out_content = document.get("content");
			//搜索结果
			TokenStream tream_name = analyzer.tokenStream("name", new StringReader(out_name));
			TokenStream tream_content = analyzer.tokenStream("content", new StringReader(out_content));
			try {
				String name = highlighter.getBestFragment(tream_name, out_name);
				String content = highlighter.getBestFragment(tream_content, out_content);
				if(name == null)
					name = out_name;
				if(content == null)
					content = out_content;
				System.out.println(name);
				System.out.println(content);
				System.out.println("---------------");
			} catch (InvalidTokenOffsetsException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		search.close();
	}
	
	/**
	 * 多条件查询
	 * @throws IOException 
	 * @throws CorruptIndexException 
	 * @throws ParseException 
	 */
	public void mutilSearch_2(String val) throws CorruptIndexException, IOException, ParseException{
			
			IndexSearcher search = new IndexSearcher(FSDirectory.open(new File(indexPath)), true);
			BooleanQuery booleanQuery = new BooleanQuery();
			
			final String field = "name";
			QueryParser queryParser = new QueryParser(Version.LUCENE_33, field, analyzer);
			Query query = queryParser.parse(val);
			booleanQuery.add(query, Occur.MUST_NOT);
			
			final String field_2 = "content";
			QueryParser queryParser_2 = new QueryParser(Version.LUCENE_33, field_2, analyzer);
			Query query_2 = queryParser_2.parse(val);
			booleanQuery.add(query_2, Occur.MUST);
			
			//设置高亮显示
			//设置高亮显示格式
			SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<font><strong>", "</strong></font>");
			//语法高亮显示设置
			Highlighter highlighter = new Highlighter(formatter, new QueryScorer(query));
			
			highlighter.setTextFragmenter(new SimpleFragmenter(100));//100似乎是表示最终输出字的个数
			
			
			TopDocs result = search.search(booleanQuery, 10);// 10是显示队列的Size
			System.out.println("搜索结果，搜索条数为：" + result.totalHits);
			for (ScoreDoc item : result.scoreDocs) {
				Document document = search.doc(item.doc);
				String out_name = document.get("name");
				String out_content = document.get("content");
				//搜索结果
				TokenStream tream_name = analyzer.tokenStream("name", new StringReader(out_name));
				TokenStream tream_content = analyzer.tokenStream("content", new StringReader(out_content));
				try {
					String name = highlighter.getBestFragment(tream_name, out_name);
					String content = highlighter.getBestFragment(tream_content, out_content);
					if(name == null)
						name = out_name;
					if(content == null)
						content = out_content;
					System.out.println(name);
					System.out.println(content);
					System.out.println("---------------");
				} catch (InvalidTokenOffsetsException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
			search.close();
		}
	
}
分享到：
OpenCms初始化一个CmsObject | Java中3DES加密与C#兼容
2011-07-20 23:05
浏览 2391
评论(0)
分类:开源软件
查看更多
发表评论

您还没有登录,请您登录后再发表评论
最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Luence 初探

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Luence 初探

评论

发表评论

相关推荐

Maven仓库汇总

Lucene排序以及自定义排序

HttpClient使用HttpPost进行文件上传

HttpClient使用HttpGet进行json数据传输

最近访客更多访客>>