lucene入门代码一

onlyOneToOne

浏览: 205566 次
性别:
来自: 黑龙江

最近访客更多访客>>

sunearlier

wenjiefeng

huangxinyu_it

chm00d339

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Apache lucene

lucene apache java analyzer

本代码涉及到的关键类有：
IndexWriter
Directory
Analyzer
Document
Field
IndexSearcher
Term
Query
TermQuery
TopDocs

代码示例：

package com.yale.lucene;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 创建一个索引
 * 
 * 
 * 
 */
public class Indexer
{
	public static void main(String[] args) throws Exception
	{
		// 创建索引放在这个目录
		String indexDir = "F://新建文件夹//luceneTest//indexFile";
		// 要索引的文件存在的目录
		String dataDir = "F://新建文件夹//luceneTest//dataSource";
		long start = System.currentTimeMillis();
		Indexer indexer = new Indexer(indexDir);
		int numIndexed;
		try
		{
			numIndexed = indexer.index(dataDir, new TextFilesFilter());
		}
		finally
		{
			indexer.close();
		}
		long end =System.currentTimeMillis();
		System.out.println("Indexing " + numIndexed + " files took "
				+ (end - start) + " milliseconds");

	}

	private IndexWriter writer;

	public Indexer(String indexDir) throws Exception
	{
		Directory dir = FSDirectory.open(new File(indexDir));
		// 创建IndexWriter
		writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),
				true, IndexWriter.MaxFieldLength.LIMITED);

	}

	// 关闭IndexWriter
	public void close() throws Exception
	{
		writer.close();
	}

	// 开始索引
	public int index(String dir, FileFilter filter) throws Exception
	{
		File[] files = new File(dir).listFiles();
		for (File f : files)
		{
			if (!f.isDirectory() && !f.isHidden() && f.exists() && f.canRead()
					&& (filter == null || filter.accept(f)))
			{
				indexFile(f);
			}
		}
		return writer.numDocs();
	}

	// 索引文件
	private void indexFile(File f) throws Exception
	{
		System.out.println("Indexing " + f.getCanonicalPath());
		Document doc = getDocument(f);
		writer.addDocument(doc);

	}

	// 文档设置
	private Document getDocument(File f) throws Exception
	{
		Document doc = new Document();
		doc.add(new Field("contents", File2Reader(f), Field.Store.YES,
				Field.Index.ANALYZED));
		doc.add(new Field("filename", f.getName(), Field.Store.YES,
				Field.Index.NOT_ANALYZED));
		doc.add(new Field("fullpath", f.getCanonicalPath(), Field.Store.YES,
				Field.Index.NOT_ANALYZED));

		return doc;
	}

	public static String File2Reader(File f) throws Exception
	{
		BufferedReader bf = new BufferedReader(new InputStreamReader(
				new FileInputStream(f)));
		StringBuffer sb = new StringBuffer();
		for (String line = null; (line = bf.readLine()) != null;)
		{
			sb.append(line).append("\n");
		}
		return sb.toString();
	}

	// 文件类型过滤
	private static class TextFilesFilter implements FileFilter
	{
		@Override
		public boolean accept(File pathname)
		{

			return pathname.getName().toLowerCase().endsWith(".txt");
		}
	}
}

package com.yale.lucene;

import java.io.File;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Searcher
{
	public static void main(String[] args) throws Exception
	{
		// 索引放在这个目录
		String indexDir = "F://新建文件夹//luceneTest//indexFile";
		// 要查询的字符串
		String queryString = "Apache";
		search(indexDir, queryString);
	}
//开始搜索
	public static void search(String indexDir, String queryString)
			throws Exception
	{
		Directory dir = FSDirectory.open(new File(indexDir));

		IndexSearcher is = new IndexSearcher(dir);

		QueryParser parser = new QueryParser(Version.LUCENE_30, "contents",
				new StandardAnalyzer(Version.LUCENE_30));

		Query query = parser.parse(queryString);

		long start = System.currentTimeMillis();
		TopDocs hits = is.search(query, 10);
		long end = System.currentTimeMillis();
		System.err.println("找到   " + hits.totalHits + "个文件 在" + (end - start)
				+ "毫秒匹配 要查询的字符串	'" + queryString + "'");
		
		for(ScoreDoc scoreDoc :hits.scoreDocs)
		{
			Document doc = is.doc(scoreDoc.doc);
			
			System.out.println(doc.get("fullpath"));
			
			System.out.println(doc.get("filename"));
			
			System.out.println(doc.get("contents"));
		}
		is.close();
	}
}

分享到：

lucene入门代码二（TermQuery） | 老师说struts2文件上传而已，你看下面。

2012-06-01 01:33
浏览 1881
评论(1)
分类:编程语言
查看更多

1 楼 object_object 2012-07-02

不错，学习了。。。

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene入门代码一

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene入门代码一

评论

发表评论

相关推荐

Solr 4.10.2整合IKAnalyzer 2012FF_hf1 智能分析与细粒度分析配置useSmart

lucene入门代码六（使用FastVectorHighlighter高亮）

lucene入门代码五（在搜索结果中使用高亮）

lucene入门代码四（使用css高亮文本）

lucene入门代码三（near-real-time）

lucene入门代码二（TermQuery）

最近访客更多访客>>