浏览 3609 次
锁定老帖子 主题:Lucene简单入门示例
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2009-12-22
Lucene自带示例精简,只留下了主要代码。以备查看 对文件夹生成索引 package zhch.illq.lucene; import java.io.File; import java.io.FileReader; import java.io.IOException; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class LuceneIndex { static final File INDEX_DIR = new File("d:\\temp\\index"); // 主要代码 索引docDir文件夹下文档,索引文件在INDEX_DIR文件夹中 public static void main(String[] args) { File docDir = new File("d:\\temp\\neirong"); try { IndexWriter standardWriter = new IndexWriter(FSDirectory.open(INDEX_DIR), new StandardAnalyzer( Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED); // 如果是索引中文内容,可以使用Paoding中文分词器 IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new PaodingAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); String[] files = docDir.list(); for (String fileStr : files) { File file = new File(docDir, fileStr); if (!file.isDirectory()) { writer.addDocument(document(file)); } } writer.optimize(); writer.close(); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } } public static Document document(File f) throws java.io.FileNotFoundException { Document doc = new Document(); // 添加path,索引(可查询)但不切词 doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // 添加最后修改日期 doc.add(new Field("modified", DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE), Field.Store.YES, Field.Index.NOT_ANALYZED)); // 添加内容,指定一个Reader,文件内容解析但不存储,这里的Reader使用系统默认的编码读入 doc.add(new Field("contents", new FileReader(f))); return doc; } } 对索引进行查询 package zhch.illq.lucene; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; public class LuceneSearch { /** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String index = "d:\\temp\\index"; String field = "contents"; String queries = null; boolean raw = false; // 要显示条数 int hitsPerPage = 10; // searching, so read-only=true IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); // only Searcher searcher = new IndexSearcher(reader); Analyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); // 如果是索引中文内容,可以使用Paoding中文分词器 Analyzer analyzer = new PaodingAnalyzer(); BufferedReader in = new BufferedReader(new InputStreamReader(System.in)); QueryParser parser = new QueryParser(field, analyzer); while (true) { if (queries == null) // prompt the user System.out.println("Enter query: "); String line = in.readLine(); if (line == null || line.length() == -1) break; line = line.trim(); if (line.length() == 0) break; Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null); } reader.close(); } public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, int hitsPerPage, boolean raw, boolean interactive) throws IOException { TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, false); searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int end, numTotalHits = collector.getTotalHits(); System.out.println(numTotalHits + " total matching documents"); int start = 0; end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { System.out.println((i + 1) + ". " + path); System.out.println(" modified: " + doc.get("modified")); } else { System.out.println((i + 1) + ". " + "No path for this document"); } } } } 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |
发表时间:2010-01-06
呵呵,好久没看这些了。果然是最简单的入门例子。
|
|
返回顶楼 | |
发表时间:2010-06-07
好东西,我看看,呵呵
|
|
返回顶楼 | |
发表时间:2010-07-13
还是用的庖丁解牛的那个插件
|
|
返回顶楼 | |