论坛首页 入门技术论坛

Getting Started with Lucene

浏览 1449 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
作者 正文
   发表时间:2010-02-22   最后修改:2010-02-22














下面是Lucene in Action里面的一个简单例子。对一个目录下的文本文件建立索引,然后使用关键字查询文件。


public class Indexer {
    public static void main(String[] args) throws Exception {
        if (args.length != 2) {
            throw new Exception("Usage: java " + Indexer.class.getName()
                    + " <index dir> <data dir>");
        String indexDir = args[0]; //1
        String dataDir = args[1]; //2
        long start = System.currentTimeMillis();
        Indexer indexer = new Indexer(indexDir);
        int numIndexed = indexer.index(dataDir);
        long end = System.currentTimeMillis();
        System.out.println("Indexing " + numIndexed + " files took "
                + (end - start) + " milliseconds");

    private IndexWriter writer;

    public Indexer(String indexDir) throws IOException {
        Directory dir = FSDirectory.open(new File(indexDir), null);
        writer = new IndexWriter(dir, //3
                new StandardAnalyzer(Version.LUCENE_CURRENT), true,

    public void close() throws IOException {
        writer.close(); //4

    public int index(String dataDir) throws Exception {
        File[] files = new File(dataDir).listFiles();
        for (int i = 0; i < files.length; i++) {
            File f = files[i];
            if (acceptFile(f)) {
        return writer.numDocs(); //5

    private boolean acceptFile(File f) {
        return !f.isDirectory() &&
                !f.isHidden() &&
                f.exists() &&
                f.canRead() &&

    private void indexFile(File f) throws Exception {
        System.out.println("Indexing " + f.getCanonicalPath());
        Document doc = getDocument(f);
        if (doc != null) {
            writer.addDocument(doc); //9

    private Document getDocument(File f) throws Exception {
        Document doc = new Document();
        doc.add(new Field("contents", new FileReader(f))); //7
        doc.add(new Field("filename", f.getCanonicalPath(), //8
                Field.Store.YES, Field.Index.NOT_ANALYZED));
        return doc;



public class Searcher {
    public static void main(String[] args) throws Exception {
        if (args.length != 2) {
            throw new Exception("Usage: java " + Searcher.class.getName()
                    + " <index dir> <query>");
        String indexDir = args[0]; //1
        String q = args[1]; //2
        search(indexDir, q);

    public static void search(String indexDir, String q)
            throws Exception {
        Directory dir = FSDirectory.open(new File(indexDir), null);
        IndexSearcher is = new IndexSearcher(dir); //3
        QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "contents", new StandardAnalyzer(Version.LUCENE_CURRENT)); //4
        Query query = parser.parse(q); //4
        long start = System.currentTimeMillis();
        TopDocs hits = is.search(query, 10); //5
        long end = System.currentTimeMillis();
        System.err.println("Found " + hits.totalHits + //6
                " document(s) (in " + (end - start) +
                " milliseconds) that matched query '" +
                q + "':");
        for (int i = 0; i < hits.scoreDocs.length; i++) {
            ScoreDoc scoreDoc = hits.scoreDocs[i];
            Document doc = is.doc(scoreDoc.doc); //7
            System.out.println(doc.get("filename")); //8
        is.close(); //9


  • 大小: 34.7 KB
论坛首页 入门技术版

Global site tag (gtag.js) - Google Analytics