`
lizhensan
  • 浏览: 378729 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

lucene学习

    博客分类:
  • java
 
阅读更多

创建索引

package org.apache.lucene.demo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class IndexFiles {
	public static void main(String[] args) {

		//索引文件所在的目录
		String indexPath = "c:/index";
		//需要索引的文件目录
		String docsPath = "c:/docs";
		//是否创建
		boolean create = true;

		
		File docDir = new File(docsPath);

		Date start = new Date();
		try {
			System.out.println("Indexing to directory '" + indexPath + "'...");

			
			Directory dir = FSDirectory.open(new File(indexPath));
			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
			IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);

			if (create) {
				iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
			} else {
				iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
			}

			IndexWriter writer = new IndexWriter(dir, iwc);
			//创建索引文件
			indexDocs(writer, docDir);

			writer.close();

			Date end = new Date();
			System.out.println(end.getTime() - start.getTime() + " total milliseconds");
		} catch (IOException e) {
			System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
		}
	}

	static void indexDocs(IndexWriter writer, File file) throws IOException {
		if (file.canRead())
			if (file.isDirectory()) {
				String[] files = file.list();

				if (files != null)
					for (int i = 0; i < files.length; i++)
						indexDocs(writer, new File(file, files[i]));
			} else {
				FileInputStream fis;
				try {
					fis = new FileInputStream(file);
				} catch (FileNotFoundException fnfe) {
					return;
				}

				try {
					//Document 代表索引的一条数据
					Document doc = new Document();

					Field pathField = new Field("path", file.getPath(), Field.Store.YES,
							Field.Index.NOT_ANALYZED_NO_NORMS);
					pathField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY);
					doc.add(pathField);

					NumericField modifiedField = new NumericField("modified");
					modifiedField.setLongValue(file.lastModified());
					doc.add(modifiedField);

					doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis,
							"UTF-8"))));

					if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
						System.out.println("adding " + file);
						writer.addDocument(doc);
					} else {
						System.out.println("updating " + file);
						writer.updateDocument(new Term("path", file.getPath()), doc);
					}
				} finally {
					fis.close();
				}
			}
	}
}

 

利用索引查找文件

package org.apache.lucene.demo;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.util.Date;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class SearchFiles {
	public static void main(String[] args) throws Exception {

		String index = "c:/index";
		String field = "contents";
		String queries = null;
		int repeat = 0;
		boolean raw = false;
		String queryString = null;
		int hitsPerPage = 10;


		IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)));
		IndexSearcher searcher = new IndexSearcher(reader);
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);

		BufferedReader in = null;
		if (queries != null)
			in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
		else {
			in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
		}
		QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer);
		while (true) {
			if ((queries == null) && (queryString == null)) {
				System.out.println("Enter query: ");
			}

			String line = queryString != null ? queryString : in.readLine();

			if ((line == null) || (line.length() == -1)) {
				break;
			}
			line = line.trim();
			if (line.length() == 0) {
				break;
			}
			Query query = parser.parse(line);
			System.out.println("Searching for: " + query.toString(field));

			if (repeat > 0) {
				Date start = new Date();
				for (int i = 0; i < repeat; i++) {
					searcher.search(query, null, 100);
				}
				Date end = new Date();
				System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
			}

			doPagingSearch(in, searcher, query, hitsPerPage, raw, (queries == null)
					&& (queryString == null));

			if (queryString != null) {
				break;
			}
		}
		searcher.close();
		reader.close();
	}

	public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query,
			int hitsPerPage, boolean raw, boolean interactive) throws IOException {
		TopDocs results = searcher.search(query, 5 * hitsPerPage);
		ScoreDoc[] hits = results.scoreDocs;

		int numTotalHits = results.totalHits;
		System.out.println(numTotalHits + " total matching documents");

		int start = 0;
		int end = Math.min(numTotalHits, hitsPerPage);
		while (true) {
			if (end > hits.length) {
				System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
						+ " total matching documents collected.");
				System.out.println("Collect more (y/n) ?");
				String line = in.readLine();
				if ((line.length() == 0) || (line.charAt(0) == 'n')) {
					break;
				}
				hits = searcher.search(query, numTotalHits).scoreDocs;
			}

			end = Math.min(hits.length, start + hitsPerPage);

			for (int i = start; i < end; i++) {
				if (raw) {
					System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
				} else {
					System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);

					Document doc = searcher.doc(hits[i].doc);
					String path = doc.get("path");
					if (path != null) {
						System.out.println(i + 1 + ". --" + path);
						String title = doc.get("title");
						System.out.println(doc.get("contents"));
						if (title != null)
							System.out.println("   Title: " + doc.get("title"));
					} else {
						System.out.println(i + 1 + ". " + "No path for this document");
					}
				}
			}

			if ((!interactive) || (end == 0)) {
				break;
			}
			if (numTotalHits >= end) {
				boolean quit = false;
				while (true) {
					System.out.print("Press ");
					if (start - hitsPerPage >= 0) {
						System.out.print("(p)revious page, ");
					}
					if (start + hitsPerPage < numTotalHits) {
						System.out.print("(n)ext page, ");
					}
					System.out.println("(q)uit or enter number to jump to a page.");

					String line = in.readLine();
					if ((line.length() == 0) || (line.charAt(0) == 'q')) {
						quit = true;
						break;
					}
					if (line.charAt(0) == 'p') {
						start = Math.max(0, start - hitsPerPage);
						break;
					}
					if (line.charAt(0) == 'n') {
						if (start + hitsPerPage >= numTotalHits)
							break;
						start += hitsPerPage;
						break;
					}

					int page = Integer.parseInt(line);
					if ((page - 1) * hitsPerPage < numTotalHits) {
						start = (page - 1) * hitsPerPage;
						break;
					}
					System.out.println("No such page");
				}

				if (quit)
					break;
				end = Math.min(numTotalHits, start + hitsPerPage);
			}
		}
	}
}
 
分享到:
评论

相关推荐

    lucene学习资料收集

    【标题】:“Lucene学习资料收集” 【描述】:Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发。这个资料集可能包含了关于如何理解和使用Lucene的各种资源,特别是通过博主huanglz19871030在iteye上的...

    lucene学习资料

    《Lucene学习资料》 Lucene是一个开源的全文搜索引擎库,由Apache软件基金会维护。它提供了高级的文本分析和索引功能,使得开发者能够轻松地在应用程序中集成强大的搜索功能。这个资料包中的《Lucene in Action_2nd...

    Lucene学习源码.rar

    本文将主要围绕Java Lucene进行深入探讨,并基于提供的“Lucene学习源码.rar”文件中的“Lucene视频教程_讲解部分源码”展开讨论。 一、Lucene核心概念 1. 文档(Document):Lucene中的基本单位,用于存储待检索...

    lucene学习pdf2

    "lucene学习pdf2" 提供的文档,无疑是对Lucene深入理解的一把钥匙,它涵盖了Lucene的核心概念、操作流程以及高级特性。 首先,Lucene的基础知识是必不可少的。Lucene的核心在于索引和搜索,它将非结构化的文本数据...

    lucene学习-02

    【标题】:“Lucene学习-02” 在深入探讨“Lucene学习-02”这一主题之前,我们先来理解一下Lucene的核心概念。Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,广泛应用于各种搜索引擎和信息检索系统。...

    Lucene的的学习资料及案例

    **Lucene学习指南** Lucene是一个高性能、全文检索库,由Apache软件基金会开发并维护,是Java编程语言中广泛使用的搜索引擎库。它提供了一个简单的API,使得开发者能够方便地在应用中实现全文检索功能。本篇文章将...

    【大搜集:lucene学习资料】---<下载不扣分,回帖加1分,欢迎下载,童叟无欺>

    lucene学习笔记 1 .txt lucene学习笔记 2.txt lucene学习笔记 3 .txt lucene入门实战.txt Lucene 的学习 .txt Lucene-2.0学习文档 .txt Lucene入门与使用 .txt lucene性能.txt 大富翁全文索引和查询的例子...

    Lucene学习工具包.zip

    **Lucene学习工具包** Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发并维护。这个"Lucene学习工具包.zip"包含了学习Lucene所需的重要资料和资源,旨在帮助开发者深入理解和掌握Lucene的核心概念、功能...

    Lucene学习例子与文档

    **Lucene学习例子与文档详解** Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,它提供了完整的搜索功能,包括索引、查询、排序等。Lucene被广泛应用于各种需要全文检索的项目中,如网站、文档管理、...

    lucene学习入门程序

    **Lucene学习入门程序** Lucene是一个开源的全文搜索引擎库,由Apache软件基金会开发并维护。它是Java编写,可以被集成到各种应用中,提供强大的文本检索功能。本程序是针对初学者设计的,旨在帮助开发者快速理解并...

    lucene学习全方面剖析总结

    ### Lucene 学习全方面剖析总结 #### Lucene 原理与应用概述 Lucene 是一个高性能、全文检索的开源库,被广泛应用于各种搜索引擎的开发之中。本篇文章旨在全面剖析 Lucene 的核心技术和应用场景,帮助读者深入理解...

    Lucene 学习笔记 1

    **Lucene 学习笔记 1** Lucene 是一个全文搜索引擎库,由 Apache 软件基金会开发。它提供了一个可扩展的、高性能的搜索框架,使得开发者能够在其应用程序中集成高级的搜索功能。本篇学习笔记将深入探讨 Lucene 的...

    Lucene学习笔记.doc

    【Lucene学习笔记】 Lucene是一个高性能、全文检索的开源库,完全用Java编写,能够帮助开发者在应用程序中实现复杂的搜索引擎功能。它提供了强大的索引和搜索机制,使得从大量文本数据中快速找到相关信息变得简单。...

    lucene学习笔记

    标题:Lucene学习笔记 描述:Lucene学习笔记,Lucene入门必备材料 知识点: 一、Lucene概述与文档管理策略 Lucene是一款高性能、全功能的文本搜索引擎库,广泛应用于文档检索、全文搜索等场景。为了提升搜索效率...

Global site tag (gtag.js) - Google Analytics