Lucene4.6 学习 001 -

zhangzhanlei1988

浏览: 41282 次
性别:
来自: 北京

最近访客更多访客>>

mikin840815

yin1003003

dongqdong

奥脑子

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

Lucene4.6 学习 001

博客分类：

lucene

Lucene4.6 学习

package com.zhangzhanlei.lucene;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.document.Field;

import org.apache.lucene.index.IndexWriter;

import org.apache.lucene.index.IndexWriterConfig;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

public class TestIndexer

{

private String fieldName;

private String endStr;

public TestIndexer(String fieldName,String endStr)

{

this.fieldName = fieldName;

this.endStr = endStr;

}

/**

* lucene 索引创建主方法

* @param indexDir

* @param dataDir

* @return

* @throws IOException

public int index(File indexDir,File dataDir) throws IOException

{

if(!dataDir.exists()||!dataDir.isDirectory())

{

throw new IOException(dataDir+":does not exist or is not a directory");

}

Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46,true);

IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_46,analyzer);

Directory directory = FSDirectory.open(indexDir);

if(IndexWriter.isLocked(directory))

{

IndexWriter.unlock(directory);

}

IndexWriter writer = new IndexWriter(directory,indexWriterConfig);

writer.deleteAll();

indexDirectory(writer,dataDir);

int numIndexed = writer.numDocs();

writer.close();

return numIndexed;

}

public void indexDirectory(IndexWriter writer,File dir) throws IOException

{

File [] fiels = dir.listFiles();

for(File file : fiels)

{

if(file.isDirectory())

{

indexDirectory(writer,file);

}

else if (file.getName().endsWith(this.endStr))

{

indexFile(writer,file);

}

/**

* 对文件创建索引

* @param writer

* @param f

* @throws IOException

public void indexFile(IndexWriter writer,File f) throws IOException

{

if(f.isHidden()||!f.exists()||!f.canRead())

{

return;

}

System.out.println("Indexing: "+f.getCanonicalPath());

getTXT (writer,f,"GBK");

}

/***

* 读取文件,为单行加入索引

* @param file

* @param charset

* @return

* @throws IOException

public void getTXT (IndexWriter writer,File file,String charset) throws IOException

{

FileInputStream fileInputStream = new FileInputStream(file);

InputStreamReader inputStreamReader = new InputStreamReader(fileInputStream,charset);

BufferedReader reader = new BufferedReader(inputStreamReader);

String line = new String();

while((line=reader.readLine())!=null)

{

Document doc = new Document();

doc.add(new Field("line",line,Field.Store.YES,Field.Index.ANALYZED));

writer.addDocument(doc);

}

reader.close();

}

/**

* @param args

public static void main(String[] args)

{

TestIndexer indexer = new TestIndexer("filepath",".txt");

try

{

File indexDir = new File ("d:\\lucenetest\\index");

File dataDir = new File ("d:\\lucenetest\\file");

int result = indexer.index(indexDir, dataDir);

System.out.println("indexing : " +result + " files.");

}

catch (Exception e)

{

e.printStackTrace();

}

Indexing: D:\lucenetest\file\bwpf814.txt

Indexing: D:\lucenetest\file\bwpf815.txt

Indexing: D:\lucenetest\file\bwpf816.txt

Indexing: D:\lucenetest\file\bwpf817.txt

Indexing: D:\lucenetest\file\bwpf818.txt

indexing : 45550 files.

package com.zhangzhanlei.lucene;

import java.io.File;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;

import org.apache.lucene.document.Document;

import org.apache.lucene.index.IndexReader;

import org.apache.lucene.queryparser.classic.ParseException;

import org.apache.lucene.queryparser.classic.QueryParser;

import org.apache.lucene.search.IndexSearcher;

import org.apache.lucene.search.Query;

import org.apache.lucene.search.ScoreDoc;

import org.apache.lucene.search.TopDocs;

import org.apache.lucene.store.Directory;

import org.apache.lucene.store.FSDirectory;

import org.apache.lucene.util.Version;

public class TestSearcher

{

private File indexDir;

private String fieldName;

public TestSearcher (File indexDir,String fieldName)

{

this.indexDir = indexDir;

this.fieldName = fieldName;

}

public void searcher(String keywords) throws IOException, ParseException

{

Directory fsDir = FSDirectory.open(indexDir);

IndexReader reader = IndexReader.open(fsDir);

IndexSearcher is = new IndexSearcher(reader);

Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_46,true);

QueryParser queryParser = new QueryParser(Version.LUCENE_46,fieldName,analyzer);

Query query = queryParser.parse(keywords);

TopDocs docs = is.search(query, 1000);

ScoreDoc [] scoreDoc = docs.scoreDocs;

System.out.println("Found "+docs.totalHits+" documents that matched query '"+keywords +"'");

for(int i = 0 ;i<scoreDoc.length;i++)

{

Document miDoc = reader.document(scoreDoc[i].doc);

System.out.println(miDoc.get(fieldName));

}

reader.close();

}

/**

* @param args

public static void main(String[] args)

{

TestSearcher searcher = new TestSearcher(new File("d:\\lucenetest\\index"),"line");

try

{

searcher.searcher("查询超时");

}

catch(Exception e)

{

e.printStackTrace();

}

Found 750 documents that matched query '查询超时'

Caused by: com.microsoft.sqlserver.jdbc.SQLServerException: 查询超时。

分享到：

Lucene 概念总结 | 程序员面试什么最重要

2014-01-22 10:40
浏览 721
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene4.6 学习 001

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

Lucene4.6 学习 001

评论

发表评论

相关推荐

MultiFieldQueryParser 使用方法

Lucene 学习 003

Lucene 学习 002

Lucene 概念总结

Lucene 的索引性能如何优化

最近访客更多访客>>