lucene学习

lizhensan

浏览: 383575 次
性别:
来自: 深圳

最近访客更多访客>>

772191140

mlstd2012

webeasymail

rubskin

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

java

创建索引

package org.apache.lucene.demo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IndexFiles {
	public static void main(String[] args) {

		//索引文件所在的目录
		String indexPath = "c:/index";
		//需要索引的文件目录
		String docsPath = "c:/docs";
		//是否创建
		boolean create = true;

		
		File docDir = new File(docsPath);

		Date start = new Date();
		try {
			System.out.println("Indexing to directory '" + indexPath + "'...");

			
			Directory dir = FSDirectory.open(new File(indexPath));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);

			if (create) {
				iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
			} else {
				iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
			}

			IndexWriter writer = new IndexWriter(dir, iwc);
			//创建索引文件
			indexDocs(writer, docDir);

			writer.close();

			Date end = new Date();
			System.out.println(end.getTime() - start.getTime() + " total milliseconds");
		} catch (IOException e) {
			System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
		}
	}

	static void indexDocs(IndexWriter writer, File file) throws IOException {
		if (file.canRead())
			if (file.isDirectory()) {
				String[] files = file.list();

				if (files != null)
					for (int i = 0; i < files.length; i++)
						indexDocs(writer, new File(file, files[i]));
			} else {
				FileInputStream fis;
				try {
					fis = new FileInputStream(file);
				} catch (FileNotFoundException fnfe) {
					return;
				}

				try {
					//Document 代表索引的一条数据
					Document doc = new Document();

					Field pathField = new Field("path", file.getPath(), Field.Store.YES,
							Field.Index.NOT_ANALYZED_NO_NORMS);
					pathField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
					doc.add(pathField);

					NumericField modifiedField = new NumericField("modified");
					modifiedField.setLongValue(file.lastModified());
					doc.add(modifiedField);

					doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis,
							"UTF-8"))));

					if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
						System.out.println("adding " + file);
						writer.addDocument(doc);
					} else {
						System.out.println("updating " + file);
						writer.updateDocument(new Term("path", file.getPath()), doc);
					}
				} finally {
					fis.close();
				}
			}
	}
}

利用索引查找文件

package org.apache.lucene.demo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class SearchFiles {
	public static void main(String[] args) throws Exception {

		String index = "c:/index";
		String field = "contents";
		String queries = null;
		int repeat = 0;
		boolean raw = false;
		String queryString = null;
		int hitsPerPage = 10;


		IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)));
		IndexSearcher searcher = new IndexSearcher(reader);
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);

		BufferedReader in = null;
		if (queries != null)
			in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
		else {
			in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
		}
		QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);
		while (true) {
			if ((queries == null) && (queryString == null)) {
				System.out.println("Enter query: ");
			}

			String line = queryString != null ? queryString : in.readLine();

			if ((line == null) || (line.length() == -1)) {
				break;
			}
			line = line.trim();
			if (line.length() == 0) {
				break;
			}
			Query query = parser.parse(line);
			System.out.println("Searching for: " + query.toString(field));

			if (repeat > 0) {
				Date start = new Date();
				for (int i = 0; i < repeat; i++) {
					searcher.search(query, null, 100);
				}
				Date end = new Date();
				System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
			}

			doPagingSearch(in, searcher, query, hitsPerPage, raw, (queries == null)
					&& (queryString == null));

			if (queryString != null) {
				break;
			}
		}
		searcher.close();
		reader.close();
	}

	public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
			int hitsPerPage, boolean raw, boolean interactive) throws IOException {
		TopDocs results = searcher.search(query, 5 * hitsPerPage);
		ScoreDoc[] hits = results.scoreDocs;

		int numTotalHits = results.totalHits;
		System.out.println(numTotalHits + " total matching documents");

		int start = 0;
		int end = Math.min(numTotalHits, hitsPerPage);
		while (true) {
			if (end > hits.length) {
				System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
						+ " total matching documents collected.");
				System.out.println("Collect more (y/n) ?");
				String line = in.readLine();
				if ((line.length() == 0) || (line.charAt(0) == 'n')) {
					break;
				}
				hits = searcher.search(query, numTotalHits).scoreDocs;
			}

			end = Math.min(hits.length, start + hitsPerPage);

			for (int i = start; i < end; i++) {
				if (raw) {
					System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
				} else {
					System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);

					Document doc = searcher.doc(hits[i].doc);
					String path = doc.get("path");
					if (path != null) {
						System.out.println(i + 1 + ". --" + path);
						String title = doc.get("title");
						System.out.println(doc.get("contents"));
						if (title != null)
							System.out.println("   Title: " + doc.get("title"));
					} else {
						System.out.println(i + 1 + ". " + "No path for this document");
					}
				}
			}

			if ((!interactive) || (end == 0)) {
				break;
			}
			if (numTotalHits >= end) {
				boolean quit = false;
				while (true) {
					System.out.print("Press ");
					if (start - hitsPerPage >= 0) {
						System.out.print("(p)revious page, ");
					}
					if (start + hitsPerPage < numTotalHits) {
						System.out.print("(n)ext page, ");
					}
					System.out.println("(q)uit or enter number to jump to a page.");

					String line = in.readLine();
					if ((line.length() == 0) || (line.charAt(0) == 'q')) {
						quit = true;
						break;
					}
					if (line.charAt(0) == 'p') {
						start = Math.max(0, start - hitsPerPage);
						break;
					}
					if (line.charAt(0) == 'n') {
						if (start + hitsPerPage >= numTotalHits)
							break;
						start += hitsPerPage;
						break;
					}

					int page = Integer.parseInt(line);
					if ((page - 1) * hitsPerPage < numTotalHits) {
						start = (page - 1) * hitsPerPage;
						break;
					}
					System.out.println("No such page");
				}

				if (quit)
					break;
				end = Math.min(numTotalHits, start + hitsPerPage);
			}
		}
	}
}

分享到：

ubuntu-jdk的安装 | Eclipse插件Ant里运用ftp遇到的问题

2012-05-08 21:34
浏览 1493
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene学习

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene学习

评论

发表评论

相关推荐

java RMI 线程池的配置

apache cli 例子

jeromq 例子

Java socket 通讯流的测试

Java 杀掉线程

socket 长连接 模拟

JVM的退出

Java JMM、

Corba 了解

java JVM 1

Java 自带的辅助工具

Java 异常

java中的两个关键字 volatile & transient

Java 代理 对一批方法 的前后进行特殊处理

正则表达式 这些问题你是否明白？

dll库的制作及Java jni的调用

为什么使用rt.jar的内部类，在eclipse运行没问题，而在mvn编译的时候失败呢

javadoc 注释

JDK自带的多线程API的简单例子

Apache Commons CLI 简单例子

最近访客更多访客>>

socket 长连接模拟

Java 代理对一批方法的前后进行特殊处理

正则表达式这些问题你是否明白？