论坛首页 入门技术论坛

Lucene简单入门示例

浏览 3609 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
作者 正文
   发表时间:2009-12-22  

Lucene自带示例精简,只留下了主要代码。以备查看
对文件夹生成索引
package zhch.illq.lucene;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneIndex {

	static final File INDEX_DIR = new File("d:\\temp\\index");

	// 主要代码 索引docDir文件夹下文档,索引文件在INDEX_DIR文件夹中
	public static void main(String[] args) {
		File docDir = new File("d:\\temp\\neirong");
		try {
			IndexWriter standardWriter = new IndexWriter(FSDirectory.open(INDEX_DIR), new StandardAnalyzer(
					Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
			// 如果是索引中文内容,可以使用Paoding中文分词器
			IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new PaodingAnalyzer(), true,
					IndexWriter.MaxFieldLength.LIMITED);
			String[] files = docDir.list();
			for (String fileStr : files) {
				File file = new File(docDir, fileStr);
				if (!file.isDirectory()) {
					writer.addDocument(document(file));
				}
			}
			writer.optimize();
			writer.close();

		} catch (IOException e) {
			System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
		}
	}

	public static Document document(File f) throws java.io.FileNotFoundException {

		Document doc = new Document();

		// 添加path,索引(可查询)但不切词
		doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

		// 添加最后修改日期
		doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
				Field.Store.YES, Field.Index.NOT_ANALYZED));

		// 添加内容,指定一个Reader,文件内容解析但不存储,这里的Reader使用系统默认的编码读入
		doc.add(new Field("contents", new FileReader(f)));
		return doc;
	}
}


对索引进行查询
package zhch.illq.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;

import net.paoding.analysis.analyzer.PaodingAnalyzer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class LuceneSearch {

	/** Simple command-line based search demo. */
	public static void main(String[] args) throws Exception {

		String index = "d:\\temp\\index";
		String field = "contents";
		String queries = null;
		boolean raw = false;
		// 要显示条数
		int hitsPerPage = 10;

		// searching, so read-only=true
		IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); // only

		Searcher searcher = new IndexSearcher(reader);
		Analyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
		// 如果是索引中文内容,可以使用Paoding中文分词器
		Analyzer analyzer = new PaodingAnalyzer();

		BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
		QueryParser parser = new QueryParser(field, analyzer);
		while (true) {
			if (queries == null) // prompt the user
				System.out.println("Enter query: ");

			String line = in.readLine();

			if (line == null || line.length() == -1)
				break;

			line = line.trim();
			if (line.length() == 0)
				break;

			Query query = parser.parse(line);
			System.out.println("Searching for: " + query.toString(field));

			doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null);
		}
		reader.close();
	}

	public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw,
			boolean interactive) throws IOException {

		TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, false);
		searcher.search(query, collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs;

		int end, numTotalHits = collector.getTotalHits();
		System.out.println(numTotalHits + " total matching documents");

		int start = 0;

		end = Math.min(hits.length, start + hitsPerPage);

		for (int i = start; i < end; i++) {
			Document doc = searcher.doc(hits[i].doc);
			String path = doc.get("path");
			if (path != null) {
				System.out.println((i + 1) + ". " + path);
				System.out.println("   modified: " + doc.get("modified"));

			} else {
				System.out.println((i + 1) + ". " + "No path for this document");
			}

		}

	}

}


   发表时间:2010-01-06  
呵呵,好久没看这些了。果然是最简单的入门例子。
0 请登录后投票
   发表时间:2010-06-07  
好东西,我看看,呵呵
0 请登录后投票
   发表时间:2010-07-13  
还是用的庖丁解牛的那个插件
0 请登录后投票
论坛首页 入门技术版

跳转论坛:
Global site tag (gtag.js) - Google Analytics