lucene的多种搜索

Callan

浏览: 736730 次
性别:
来自: 上海

最近访客更多访客>>

游其是你

boyu57013652

fanglei10

wm52308

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Lucene

lucene

lucene的搜索相当强大,它提供了很多辅助查询类,各自完成一种特殊的查询,也可以相互组合使用,来完成一些复杂的操作.

public class Test{
	
	Analyzer analyzer = new StandardAnalyzer();
	
	RAMDirectory directory = new RAMDirectory();
	
	/** 
	 * 创建索引
	 * 
	 * @throws IOException
	 */
	public void index() throws IOException{
		
		IndexWriter indexWriter = new IndexWriter(directory,analyzer,true);
		
		Document doc1 = new Document();
		
		doc1.add(new Field("title","aaabbb",Store.YES,Index.TOKENIZED));
		
		doc1.add(new Field("content","If you would like to help promote OpenOffice",Store.YES,Index.TOKENIZED));
		
		doc1.add(new Field("time","2005",Store.YES,Index.TOKENIZED));
		
		indexWriter.addDocument(doc1);
		
		Document doc2 = new Document();
		
		doc2.add(new Field("title","bbcc",Store.YES,Index.TOKENIZED));
		
		doc2.add(new Field("content","sdfsdfsdfasdds",Store.YES,Index.TOKENIZED));
		
		doc2.add(new Field("time","2007",Store.YES,Index.TOKENIZED));
		
		indexWriter.addDocument(doc2);
		
		indexWriter.optimize();
		
		indexWriter.close();
	}
	
	// 按词条搜索
	public void termSearcher() throws IOException{
		
		IndexSearcher searcher = new IndexSearcher(directory);
		
		// 查询title中包含aaa
		Term term = new Term("title","aaa");
		
		Query query = new TermQuery(term);
		
		searcher.search(query);
	
		searcher.close();
	}

	// 短语搜索 
	public void phraseSearcher() throws IOException{
		
		IndexSearcher searcher = new IndexSearcher(directory);
		
		PhraseQuery phraseQuery = new PhraseQuery();
		
		// slop 两个项的位置之间允许的最大间隔,这里would和help中间只隔了like(to会被去掉),所以最大间隔设为1就能找到值
		// 对于两个相连的关键词来说,无论坡度设为多少,都可以找到
		// 对于不相连的词来说,当两个关键词相隔数小于坡度都可以找到,否则找不到
		phraseQuery.setSlop(1);
		
		phraseQuery.add(new Term("content","would"));
		
		phraseQuery.add(new Term("content","help"));
		
		// 如果将help放前面,would放后面,需要将would向后移动3个位置才能到help后面(to不算),所以要设为slop最少要设为3
		// phraseQuery.add(new Term("content","help"));
		
		// phraseQuery.add(new Term("content","would"));
		
		// phraseQuery.setSlop(3);
		
		// 短语的评分是根据项之间距离越小,评分越高,否则越小
		Hits hits =  searcher.search(phraseQuery);
		
		printResult(hits);
		
		searcher.close();
	}
	
	// 通配符搜索 WildcardQuery   
    // 通配符包括’?’匹配一个任意字符和’*’匹配零个或多个任意字符，例如你搜索’use*’，你可能找到’user’或者’uses’：   
	public void wildcardSearcher() throws IOException{
		
			IndexSearcher searcher = new IndexSearcher(directory);
			
			// 与正则一样,*代表0个或多个字母,?代表0个或一个字母
			// WildcardQuery与QueryParser不同的是:WildcardQuery的前缀可以为*,而QueryParser不行
			WildcardQuery query = new WildcardQuery(new Term("content","a?bbb*"));
			
			Hits hits = searcher.search(query);
			
			printResult(hits);
			
			searcher.close();
	}
	
	// 模糊搜索 FuzzyQuery
	public void fuzzySearcher() throws IOException{
			
			IndexSearcher search = new IndexSearcher(directory);
			
			// OpenOffica虽然没被索引,但能找到相近的OpenOffice
			FuzzyQuery query = new FuzzyQuery(new Term("content","OpenOffica"));
			
			Hits hits = search.search(query);
			
			printResult(hits);
			
			search.close();
	}
	
	// 使用前缀PrefixQuery
	public void prefixSearcher() throws IOException{
		
			IndexSearcher search = new IndexSearcher(directory);
			
			// 全部title前缀为a
			PrefixQuery query = new PrefixQuery(new Term("title","b"));
			
			Hits hits = search.search(query);
			
			printResult(hits);
			
			search.close();
			
	}

	// 范围搜索 RangeQuery
	public void rangeSearcher() throws IOException{
		
		IndexSearcher search = new IndexSearcher(directory);
		
		// RangeQuery query = new RangeQuery(beginTime, endTime, false); 
		// 开始时间,结束时间,最后一个参数表示是否包含边界条件本身,如果为false
		RangeQuery query = new RangeQuery(new Term("time","2005"),new Term("time","2007"),true);
		
		Hits hits = search.search(query);
		
		printResult(hits);
		
		search.close();
	}
	
	
	// 与或搜索BooleanQuery
	//BooleanClause用于表示布尔查询子句关系的类，包括：BooleanClause.Occur.MUST，BooleanClause.Occur.MUST_NOT，BooleanClause.Occur.SHOULD。有以下6种组合：

	//1．MUST和MUST：取得连个查询子句的交集。

	//2．MUST和MUST_NOT：表示查询结果中不能包含MUST_NOT所对应得查询子句的检索结果。

	//3．MUST_NOT和MUST_NOT：无意义，检索无结果。

	//4．SHOULD与MUST、SHOULD与MUST_NOT：SHOULD与MUST连用时，无意义，结果为MUST子句的检索结果。与MUST_NOT连用时，功能同MUST。

	//5．SHOULD与SHOULD：表示“或”关系，最终检索结果为所有检索子句的并集。

	public void booleanSearcher() throws IOException, ParseException{
		
		IndexSearcher search = new IndexSearcher(directory);
		
		QueryParser qp1 = new QueryParser("title",new StandardAnalyzer());
			
		Query query1 = qp1.parse("aa*");
			
		QueryParser qp2 = new QueryParser("title",new StandardAnalyzer());
			
		Query query2 = qp2.parse("bb*");
			
		BooleanQuery query = new BooleanQuery();
			
		// 搜索结果的title的前双缀可以是aa,或bb
		query.add(query1, BooleanClause.Occur.SHOULD);
			
		// BooleanClause.Occur.MUST 必须
		// BooleanClause.Occur.MUST_NOT 必须不是 
		query.add(query2, BooleanClause.Occur.SHOULD);
		
		Hits hits = search.search(query);
			
		printResult(hits);
		
		search.close();
		
	}
	

	// 多关键的搜索 PhrasePrefixQuery
	public void phrasePrefixSearcher() throws IOException{
		
		IndexSearcher search = new IndexSearcher(directory);
		
		PhrasePrefixQuery query = new PhrasePrefixQuery();
		
		// 这里两项都有可能首先被匹配
		query.add(new Term[]{new Term("content","would"),new Term("content","can")});
		
		// 只有一项必须匹配
		query.add(new Term("content","help"));
		
		// If you would like to help promote OpenOffice
		// can I help you
		// slop因子的作用域为查询中的所有短语
		query.setSlop(1);
		
		// 匹配第一项为 would 或 can 第二项为help
		// solp设置为1 
		// If you would like to help promote OpenOffice  除去if to 外,would与help的距离=1
		// can I help you 的距离也=1  所以可以搜索出两条数据
		
		Hits hits = search.search(query);
		
		printResult(hits);
		
		search.close();
	}
	
	// 在多个域上查询 MultiFieldQueryParser
	public void multiFieldSearcher() throws IOException, ParseException{
		
		IndexSearcher search = new IndexSearcher(directory);
		
		// 默认情况下的方式为Occur.SHOULD
		// titile可以匹配bb,content可以匹配you
//		MultiFieldQueryParser.parse(new String[]{"bb","you"},new String[]{"title","content"}, analyzer);
		
		// titile必须匹配bb,content不能匹配
		Query query = MultiFieldQueryParser.parse( new String[]{"bb","you"},new String[]{"title","content"},new BooleanClause.Occur[]{Occur.MUST,Occur.MUST_NOT}, analyzer);
		
		// title中必须包含bb  content不能有bb
		// Query query = MultiFieldQueryParser.parse( "bb*",new String[]{"title","content"},new BooleanClause.Occur[]{BooleanClause.Occur.MUST,BooleanClause.Occur.MUST_NOT}, analyzer);
		
		Hits hits = search.search(query);
		
		printResult(hits);
		
		search.close();
	}
	
	public void printResult(Hits hits) throws IOException{
		for(int i = 0; i < hits.length(); i++){
			Document d = hits.doc(i);
			System.out.println(d.get("title"));
			System.out.println(d.get("content"));
			System.out.println(d.get("time"));
		}
	}
	
	
}

5
顶

1
踩

分享到：

lucene的多种搜索2-SpanQuery | Oracle语句优化53个规则详解(2)

2008-01-07 23:35
浏览 8014
评论(2)
查看更多

2 楼 yangjianxiang2 2008-06-20

// titile必须匹配bb,content不能匹配
Query query = <SPAN class=hilite1>MultiFieldQueryParser</SPAN>.<SPAN class=hilite2>parse</SPAN>( new String[]{"bb","you"},new String[]{"title","content"},new BooleanClause.Occur[]{Occur.MUST,Occur.MUST_NOT}, analyzer);

对我帮助很大，谢谢了。

1 楼 waitingmyself 2008-02-28

public class A{
}

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene的多种搜索

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene的多种搜索

评论

发表评论

相关推荐

Lucene使用项向量提高高亮显示性能

Lucene使用项向量进行模糊查询

Lucene1.4与Lucene2.0的Field

lucene的多种搜索2-SpanQuery

Nutch的安装与运行

Lucene倒排索引原理

最近访客更多访客>>