开始学习lucene

makemyownlife

浏览: 538820 次
性别:
来自: 武汉

最近访客更多访客>>

morelily

让蛋蛋飞_007

qq_23565683

luojy200

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

lucene

apache lucene search java

lucene in action的第一章的例子，初体验哈哈


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.*;

/**
 * lucene 学习的一个例子s
 * User: zhangyong
 * Date: 12-7-12
 * Time: 下午9:35
 * To change this template use File | Settings | File Templates.
 */
public class Indexer {

    private IndexWriter writer;

    public Indexer(String indexDir) throws IOException {
        Directory dir = FSDirectory.open(new File(indexDir));
        writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_36), true, IndexWriter.MaxFieldLength.UNLIMITED);
    }

    public void close() throws IOException {   //关闭 indexWriter
        writer.close();
    }

    public int index(String dataDir, FileFilter filter) throws Exception {
        File[] files = new File(dataDir).listFiles();
        for (File f : files) {
            if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()) {
                indexFile(f);
            }
        }
        return writer.numDocs();
    }

    public static class TextFilesFilter implements FileFilter {
        public boolean accept(File path) {
            return path.getName().toLowerCase().endsWith(".txt");
        }
    }

    protected Document getDocument(File f) throws Exception {
        Document doc = new Document();
   //     doc.add(new Field("content", new FileReader(f)));
        doc.add(new Field("content", new InputStreamReader(new FileInputStream(f.getCanonicalPath()), "utf-8")));
        doc.add(new Field("fileName", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED));
        return doc;
    }

    public void indexFile(File f) throws Exception {
        System.out.println("indexing " + f.getCanonicalPath());
        Document doc = getDocument(f);
        writer.addDocument(doc);
    }

    public static void main(String[] args) throws Exception {
        String dir = "E:\\lucene";
        String dataDir = "E:\\lucene\\data";
        long start = System.currentTimeMillis();
        Indexer indexer = new Indexer(dir);
        int numIndexed;

        try {
            numIndexed = indexer.index(dataDir, new TextFilesFilter());
        } finally {
              indexer.close();
        }
        long end = System.currentTimeMillis();
        System.out.println("cost time==" + (end - start));
    }

}

package com.diyicai.share.search.test;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.IOException;

/**
 * 例子 1.2
 * User: zhangyong
 * Date: 12-7-14
 * Time: 下午7:37
 * To change this template use File | Settings | File Templates.
 */
public class Searcher {

    public static void main(String[] args) throws IOException, ParseException {
        String indexDir = "E:\\lucene";
        String q = "start";
        search(indexDir,q);
    }

    public static void search(String indexDir, String q) throws IOException, ParseException {

        Directory dir = FSDirectory.open(new File(indexDir));

        IndexSearcher is = new IndexSearcher(dir);

        QueryParser parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36));

        Query query = parser.parse(q);

        long start = System.currentTimeMillis();

        TopDocs hits = is.search(query, 10);

        long end = System.currentTimeMillis();

        System.out.println("find " + hits.totalHits);

        for (ScoreDoc scoreDoc : hits.scoreDocs) {
            Document doc = is.doc(scoreDoc.doc);
            System.out.println(doc.get("fileName"));
        }
    }

}

分享到：