lucene3.5分词+搜索 -

feizhang666

浏览: 39700 次
性别:
来自: 无锡

最近访客更多访客>>

asonhe

Lz13579

ltianan

huaiao_chen

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

lucene3.5分词+搜索

博客分类：

lucene

lucene3.5 分词搜索

最近在做lucene,发现网上的lucene实例都不是很满意,所以自己做了个 ,如果哪有问题可以指出来

建立索引

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;


public class AddIndex {
	//path为索引存放地址
	public  void addIndex(String path) {
		try{
			Directory fsDir = FSDirectory.open(new File(path));
             //记住,此处的分词器一定要和下面查询的分词器一致,否则会查不到数据
			Analyzer analyzer = new IKAnalyzer();
			IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
			IndexWriter writer = new IndexWriter(fsDir, conf);
			System.out.println("~~~建立索引~~~");
			Document document1 = new Document();
			document1.add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
			document1.add(new Field("address", "中国四川省成都市金牛区青羊东二路", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document2 = new Document();
			document2.add(new Field("id", "2", Field.Store.YES, Field.Index.NO));
			document2.add(new Field("address", "中国四川省成都市金牛区永陵路", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document3 = new Document();
			document3.add(new Field("id", "3", Field.Store.YES, Field.Index.NO));
			document3.add(new Field("address", "中国四川省成都市金牛区一环路西三段", Field.Store.YES, Field.Index.ANALYZED));
			
			Document document4 = new Document();
			document4.add(new Field("id", "4", Field.Store.YES, Field.Index.NO));
			document4.add(new Field("address", "中国四川省成都市金牛区营门口路", Field.Store.YES, Field.Index.ANALYZED));
			
			writer.addDocument(document1);  
			writer.addDocument(document2);  
			writer.addDocument(document3);  
			writer.addDocument(document4);  
			
			writer.forceMerge(1);
			writer.close();
			System.out.println("~~~索引建立完成~~~");
		}catch (IOException e) {
			System.out.println(e.toString());
		} 
	}
}

查询数据

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

public class TestLucene {
	private static String path = "e:\\lucene\\addressStore";
	Analyzer analyzer = new IKAnalyzer();
	private static File dataFile = new File(path);
	private static String str = "中国四川省成都市金牛区营门口路";
	private static String fiels = "address";
	public static void main(String[] args) {
		new AddIndex().addIndex(path);
		try {
			new TestLucene().search(str);
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

	public void search(String keyword) throws IOException, ParseException {
		Analyzer analyzer = new IKAnalyzer();
		IndexSearcher isearcher = new IndexSearcher(IndexReader.open(FSDirectory.open(dataFile)));
        //此处只需把分词器传进去,lucene会自动分词
		QueryParser parser = new QueryParser(Version.LUCENE_35, fiels,analyzer);
		Query query = parser.parse(keyword);
		System.out.println(query.toString());
			/**
			 * 执行搜索，获取查询结果集对象 10为前10条记录
			 */
		TopDocs topDocs = isearcher.search(query, 10);
		ScoreDoc[] hits = topDocs.scoreDocs;
		for (ScoreDoc scoreDoc : hits) {
			System.out.println("----------------分割线----------------------");
			Document hitDoc = isearcher.doc(scoreDoc.doc);
			float i = scoreDoc.score;
			String address = hitDoc.get("address");
			System.out.println("address:" + address+"\nsocre:"+i);
			//打印打分细节,不需要可以去掉
			//int docId = scoreDoc.doc;  
			//Explanation exp = isearcher.explain(query,docId); 
			//System.out.println(exp.toString());
		}

		isearcher.close();
	}
}

查询结果

~~~建立索引~~~

~~~索引建立完成~~~

address:中国 address:国四 address:四川省 address:四川 address:四 address:省成 address:成都市 address:成都 address:都市 address:金牛区 address:金牛 address:营 address:门口 address:路

----------------分割线----------------------

address:中国四川省成都市金牛区营门口路

socre:0.9141956

----------------分割线----------------------

address:中国四川省成都市金牛区永陵路

socre:0.44761625

----------------分割线----------------------

address:中国四川省成都市金牛区青羊东二路

socre:0.39166427

----------------分割线----------------------

address:中国四川省成都市金牛区一环路西三段

socre:0.31202385

分享到：

hive分页 | js中正则表达判断经纬度

2012-01-10 15:22
浏览 2320
评论(0)
分类:行业应用
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene3.5分词+搜索

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene3.5分词+搜索

评论

发表评论

相关推荐

最近访客更多访客>>