lucene初体验

shaoxiongwang#21cn.com

浏览: 77454 次
性别:
来自: 广州

最近访客更多访客>>

wangshengha

jiejiecsdn

dongju

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

lucene Apache Windows Access

对照着lucene官方DEMO做了个例子,自己简化了一下:
我用的是lucene-2.2.0
首先产生索引文件:
IndexFile

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Date;

/** Index all text files under a directory. */
public class IndexFiles {
  
  private IndexFiles() {}
//索引文件存放的目录
  static final File INDEX_DIR = new File("D://luceneindex");
  
  /** Index all text files under a directory. */
  public static void main(String[] args) {   
	//被索引的文件存放的目录,可以在这个目录里放一些文本文件.
    final File docDir = new File("D://lucenedata");
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" +docDir.getAbsolutePath()+
    		  "' does not exist or is not readable, please check the path");
      System.exit(1);
    }
    
    Date start = new Date();
    try {
    /*
     * 这个类作用是产生索引的,构造函数有三个参数,第一个参数INDEX_DIR是所产生的索引文件存放的目录,
     * 第二个参数是一个分析器,第三个参数是意思是新建索引.
     * */
      IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true);
      System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
      indexDocs(writer, docDir);
      System.out.println("Optimizing...");
      writer.optimize();//
      writer.close();

      Date end = new Date();
      System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
      System.out.println(" caught a " + e.getClass() +
       "\n with message: " + e.getMessage());
    }
  }

  static void indexDocs(IndexWriter writer, File file)
    throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
      if (file.isDirectory()) {
        String[] files = file.list();
        // an IO error could occur
        if (files != null) {
          for (int i = 0; i < files.length; i++) {
            indexDocs(writer, new File(file, files[i]));
          }
        }
      } else {
        System.out.println("adding " + file);
        try {
          writer.addDocument(FileDocument.Document(file));//添加被索引的文本文件
             
        
        }
        // at least on windows, some temporary files raise this exception with an "access denied" message
        // checking if the file can be read doesn't help
        catch (FileNotFoundException fnfe) {
          ;
        }
      }
    }
  }
  
}

SearchFiles这个文件利用所产生的索引文件进行搜索:

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

public class SearchFiles {	
	public static void main(String[] args) throws Exception {
		String index = "D://luceneindex";
		String field = "contents";
		IndexReader reader = IndexReader.open(index);
		Searcher searcher = new IndexSearcher(reader);
		Analyzer analyzer = new StandardAnalyzer();
		BufferedReader in = null;
		in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
		QueryParser parser = new QueryParser(field, analyzer);
		System.out.println("");
		while (true) {
			System.out.println("Enter query: ");
			String line = in.readLine();
			if (line == null || line.length() == -1)
				break;
			line = line.trim();
			if (line.length() == 0)
				break;
			Query query = parser.parse(line);
			System.out.println("Searching for: " + query.toString(field));
			Hits hits = searcher.search(query);
			System.out.println(hits.length() + " total matching documents");
			final int HITS_PER_PAGE = 10;
			for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
				int end = Math.min(hits.length(), start + HITS_PER_PAGE);
				for (int i = start; i < end; i++) {
					Document doc = hits.doc(i);
					String path = doc.get("path");
					if (path != null) {
						System.out.println((i + 1) + ". " + path);
						System.out.println("modify time :"
								+ doc.get("modified"));
						String title = doc.get("title");
						if (title != null) {
							System.out.println("   Title: " + doc.get("title"));
						}
					} else {
						System.out.println((i + 1) + ". "
								+ "No path for this document");
					}
				}
				if (hits.length() > end) {
					System.out.println("more (y/n) ? ");
					line = in.readLine();
					if (line.length() == 0 || line.charAt(0) == 'n')
						break;
				}
			}
		}
		reader.close();
	}
}

分享到：