1、准备工作
下载lucene 3.6.1 : http://lucene.apache.org/
下载中文分词IK Analyzer: http://code.google.com/p/ik-analyzer/downloads/list (注意下载的是IK Analyzer 2012_u5_source.zip,其他版本有bug)
下载solr 3.6.1: http://lucene.apache.org/solr/(编译IK Analyzer时需引用包)
OK,将lucene 、solr 相关包(lucene-core-3.6.1.jar、lucene-highlighter-3.6.1.jar、lucene-analyzers-3.6.1.jar、apache-solr-core-3.6.1.jar、apache-solr-solrj-3.6.1.jar)拷贝到项目lib下,IK源码置于项目src下。
2、从Oracle数据库中取数据创建索引(使用IK分词)
003 |
import org.apache.lucene.index.IndexWriter;
|
004 |
import org.apache.lucene.index.IndexWriterConfig;
|
005 |
import org.apache.lucene.index.CorruptIndexException;
|
006 |
import org.apache.lucene.store.FSDirectory;
|
007 |
import org.apache.lucene.store.Directory;
|
008 |
import org.apache.lucene.analysis.Analyzer;
|
009 |
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
010 |
import org.apache.lucene.util.Version;
|
011 |
import org.apache.lucene.document.Document;
|
012 |
import org.apache.lucene.document.Field;
|
013 |
import org.wltea.analyzer.lucene.IKAnalyzer;
|
015 |
import java.sql.Connection;
|
017 |
import java.io.IOException;
|
018 |
import java.util.ArrayList;
|
019 |
import java.util.Date;
|
021 |
import modules.gk.Gk_info;
|
022 |
import modules.gk.Gk_infoSub;
|
023 |
import web.sys.Globals;
|
024 |
import web.db.DBConnector;
|
025 |
import web.db.ObjectCtl;
|
026 |
import web.util.StringUtil;
|
028 |
public class LuceneIndex {
|
029 |
IndexWriter writer = null ;
|
030 |
FSDirectory dir = null ;
|
031 |
boolean create = true ;
|
034 |
long a1 = System.currentTimeMillis();
|
035 |
System.out.println( "[Lucene 开始执行:" + new Date() + "]" );
|
036 |
Connection con = DBConnector.getconecttion();
|
038 |
final File docDir = new File(Globals.SYS_COM_CONFIG.get( "sys.index.path" ).toString());
|
039 |
if (!docDir.exists()) {
|
042 |
String cr = Globals.SYS_COM_CONFIG.get( "sys.index.create" ).toString();
|
043 |
if ( "false" .equals(cr.toLowerCase())) {
|
046 |
Directory dir = FSDirectory.open(docDir);
|
048 |
Analyzer analyzer = new IKAnalyzer( true );
|
049 |
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
|
053 |
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
056 |
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
|
058 |
IndexWriter writer = new IndexWriter(dir, iwc);
|
059 |
String sql = "SELECT indexno,title,describes,pdate,keywords FROM TABLEA WHERE STATE=1 AND SSTAG<>1 " ;
|
060 |
int rowCount = ObjectCtl.getRowCount(con, sql);
|
061 |
int pageSize = StringUtil.StringToInt(Globals.SYS_COM_CONFIG.get( "sys.index.size" ).toString());
|
062 |
int pages = (rowCount - 1 ) / pageSize + 1 ;
|
063 |
ArrayList list = null ;
|
064 |
Gk_infoSub gk = null ;
|
065 |
for ( int i = 1 ; i < pages+ 1 ; i++) {
|
066 |
long a = System.currentTimeMillis();
|
067 |
list = ObjectCtl.listPage(con, sql, i, pageSize, new Gk_infoSub());
|
068 |
for ( int j = 0 ; j < list.size(); j++) {
|
069 |
gk = (Gk_infoSub) list.get(j);
|
070 |
Document doc = new Document();
|
071 |
doc.add( new Field( "indexno" , StringUtil.null2String(gk.getIndexno()), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
072 |
doc.add( new Field( "title" , StringUtil.null2String(gk.getTitle()), Field.Store.YES, Field.Index.ANALYZED));
|
073 |
doc.add( new Field( "describes" , StringUtil.null2String(gk.getDescribes()), Field.Store.YES, Field.Index.ANALYZED));
|
074 |
doc.add( new Field( "pdate" , StringUtil.null2String(gk.getPdate()), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
075 |
doc.add( new Field( "keywords" , StringUtil.null2String(gk.getKeywords()), Field.Store.YES, Field.Index.ANALYZED));
|
076 |
writer.addDocument(doc);
|
077 |
ObjectCtl.executeUpdateBySql(con, "UPDATE TABLEA SET SSTAG=1 WHERE indexno='" +gk.getIndexno()+ "'" );
|
080 |
long b = System.currentTimeMillis();
|
082 |
System.out.println( "[Lucene " + rowCount + "条," + pages + "页,第" + i + "页花费时间:" + c + "毫秒]" );
|
086 |
} catch (Exception e) {
|
089 |
DBConnector.freecon(con);
|
091 |
if (writer != null ) {
|
094 |
} catch (CorruptIndexException e) {
|
096 |
} catch (IOException e) {
|
100 |
if (dir != null && IndexWriter.isLocked(dir)) {
|
101 |
IndexWriter.unlock(dir);
|
103 |
} catch (IOException e) {
|
108 |
long b1 = System.currentTimeMillis();
|
110 |
System.out.println( "[Lucene 执行完毕,花费时间:" + c1 + "毫秒,完成时间:" + new Date() + "]" );
|
3、单字段查询以及多字段分页查询高亮显示
003 |
import org.apache.lucene.store.FSDirectory;
|
004 |
import org.apache.lucene.store.Directory;
|
005 |
import org.apache.lucene.search.*;
|
006 |
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
007 |
import org.apache.lucene.search.highlight.Highlighter;
|
008 |
import org.apache.lucene.search.highlight.SimpleFragmenter;
|
009 |
import org.apache.lucene.search.highlight.QueryScorer;
|
010 |
import org.apache.lucene.queryParser.QueryParser;
|
011 |
import org.apache.lucene.queryParser.MultiFieldQueryParser;
|
012 |
import org.apache.lucene.analysis.TokenStream;
|
013 |
import org.apache.lucene.analysis.Analyzer;
|
014 |
import org.apache.lucene.analysis.KeywordAnalyzer;
|
015 |
import org.apache.lucene.document.Document;
|
016 |
import org.apache.lucene.index.IndexReader;
|
017 |
import org.apache.lucene.index.Term;
|
018 |
import org.apache.lucene.util.Version;
|
019 |
import modules.gk.Gk_infoSub;
|
021 |
import java.util.ArrayList;
|
023 |
import java.io.StringReader;
|
024 |
import java.lang.reflect.Constructor;
|
026 |
import web.util.StringUtil;
|
027 |
import web.sys.Globals;
|
028 |
import org.wltea.analyzer.lucene.IKAnalyzer;
|
030 |
public class LuceneQuery {
|
031 |
private static String indexPath;
|
032 |
private int rowCount;
|
034 |
private int currentPage;
|
035 |
private int pageSize;
|
037 |
public LuceneQuery() {
|
038 |
this .indexPath = Globals.SYS_COM_CONFIG.get( "sys.index.path" ).toString();
|
041 |
public int getRowCount() {
|
045 |
public int getPages() {
|
049 |
public int getPageSize() {
|
053 |
public int getCurrentPage() {
|
060 |
public ArrayList queryIndexTitle(String keyWord, int curpage, int pageSize) {
|
061 |
ArrayList list = new ArrayList();
|
069 |
this .pageSize = pageSize;
|
070 |
this .currentPage = curpage;
|
071 |
int start = (curpage - 1 ) * pageSize;
|
072 |
Directory dir = FSDirectory.open( new File(indexPath));
|
073 |
IndexReader reader = IndexReader.open(dir);
|
074 |
IndexSearcher searcher = new IndexSearcher(reader);
|
075 |
Analyzer analyzer = new IKAnalyzer( true );
|
076 |
QueryParser queryParser = new QueryParser(Version.LUCENE_36, "title" , analyzer);
|
077 |
queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
|
078 |
Query query = queryParser.parse(keyWord);
|
079 |
int hm = start + pageSize;
|
080 |
TopScoreDocCollector res = TopScoreDocCollector.create(hm, false );
|
081 |
searcher.search(query, res);
|
083 |
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter( "<span style='color:red'>" , "</span>" );
|
084 |
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));
|
085 |
this .rowCount = res.getTotalHits();
|
086 |
this .pages = (rowCount - 1 ) / pageSize + 1 ;
|
087 |
TopDocs tds = res.topDocs(start, pageSize);
|
088 |
ScoreDoc[] sd = tds.scoreDocs;
|
089 |
for ( int i = 0 ; i < sd.length; i++) {
|
090 |
Document hitDoc = reader.document(sd[i].doc);
|
091 |
list.add(createObj(hitDoc, analyzer, highlighter));
|
094 |
} catch (Exception e) {
|
104 |
public ArrayList queryIndexFields(String allkeyword, String onekeyword, String nokeyword, int curpage, int pageSize) {
|
105 |
ArrayList list = new ArrayList();
|
113 |
this .pageSize = pageSize;
|
114 |
this .currentPage = curpage;
|
115 |
int start = (curpage - 1 ) * pageSize;
|
116 |
Directory dir = FSDirectory.open( new File(indexPath));
|
117 |
IndexReader reader = IndexReader.open(dir);
|
118 |
IndexSearcher searcher = new IndexSearcher(reader);
|
119 |
BooleanQuery bQuery = new BooleanQuery();
|
120 |
if (! "" .equals(allkeyword)) {
|
121 |
KeywordAnalyzer analyzer = new KeywordAnalyzer();
|
122 |
BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
|
123 |
Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, allkeyword, new String[]{ "title" , "describes" , "keywords" }, flags, analyzer);
|
124 |
bQuery.add(query, BooleanClause.Occur.MUST);
|
126 |
if (! "" .equals(onekeyword)) {
|
127 |
Analyzer analyzer = new IKAnalyzer( true );
|
128 |
BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
|
129 |
Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, onekeyword, new String[]{ "title" , "describes" , "keywords" }, flags, analyzer);
|
130 |
bQuery.add(query, BooleanClause.Occur.MUST);
|
132 |
if (! "" .equals(nokeyword)) {
|
133 |
Analyzer analyzer = new IKAnalyzer( true );
|
134 |
BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD};
|
135 |
Query query = MultiFieldQueryParser.parse(Version.LUCENE_36, nokeyword, new String[]{ "title" , "describes" , "keywords" }, flags, analyzer);
|
136 |
bQuery.add(query, BooleanClause.Occur.MUST_NOT);
|
139 |
int hm = start + pageSize;
|
140 |
TopScoreDocCollector res = TopScoreDocCollector.create(hm, false );
|
141 |
searcher.search(bQuery, res);
|
142 |
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter( "<span style='color:red'>" , "</span>" );
|
143 |
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(bQuery));
|
144 |
this .rowCount = res.getTotalHits();
|
145 |
this .pages = (rowCount - 1 ) / pageSize + 1 ;
|
146 |
System.out.println( "rowCount:" + rowCount);
|
147 |
TopDocs tds = res.topDocs(start, pageSize);
|
148 |
ScoreDoc[] sd = tds.scoreDocs;
|
149 |
Analyzer analyzer = new IKAnalyzer();
|
150 |
for ( int i = 0 ; i < sd.length; i++) {
|
151 |
Document hitDoc = reader.document(sd[i].doc);
|
152 |
list.add(createObj(hitDoc, analyzer, highlighter));
|
155 |
} catch (Exception e) {
|
167 |
private synchronized static Object createObj(Document doc, Analyzer analyzer, Highlighter highlighter) {
|
169 |
Gk_infoSub gk = new Gk_infoSub();
|
173 |
gk.setIndexno(StringUtil.null2String(doc.get( "indexno" )));
|
174 |
gk.setPdate(StringUtil.null2String(doc.get( "pdate" )));
|
175 |
String title = StringUtil.null2String(doc.get( "title" ));
|
177 |
if (! "" .equals(title)) {
|
178 |
highlighter.setTextFragmenter( new SimpleFragmenter(title.length()));
|
179 |
TokenStream tk = analyzer.tokenStream( "title" , new StringReader(title));
|
180 |
String htext = StringUtil.null2String(highlighter.getBestFragment(tk, title));
|
181 |
if (! "" .equals(htext)) {
|
185 |
String keywords = StringUtil.null2String(doc.get( "keywords" ));
|
186 |
gk.setKeywords(keywords);
|
187 |
if (! "" .equals(keywords)) {
|
188 |
highlighter.setTextFragmenter( new SimpleFragmenter(keywords.length()));
|
189 |
TokenStream tk = analyzer.tokenStream( "keywords" , new StringReader(keywords));
|
190 |
String htext = StringUtil.null2String(highlighter.getBestFragment(tk, keywords));
|
191 |
if (! "" .equals(htext)) {
|
192 |
gk.setKeywords(htext);
|
195 |
String describes = StringUtil.null2String(doc.get( "describes" ));
|
196 |
gk.setDescribes(describes);
|
197 |
if (! "" .equals(describes)) {
|
198 |
highlighter.setTextFragmenter( new SimpleFragmenter(describes.length()));
|
199 |
TokenStream tk = analyzer.tokenStream( "keywords" , new StringReader(describes));
|
200 |
String htext = StringUtil.null2String(highlighter.getBestFragment(tk, describes));
|
201 |
if (! "" .equals(htext)) {
|
202 |
gk.setDescribes(htext);
|
209 |
catch (Exception e) {
|
220 |
private synchronized static Object createObj(Document doc) {
|
222 |
Gk_infoSub gk = new Gk_infoSub();
|
226 |
gk.setIndexno(StringUtil.null2String(doc.get( "indexno" )));
|
227 |
gk.setPdate(StringUtil.null2String(doc.get( "pdate" )));
|
228 |
gk.setTitle(StringUtil.null2String(doc.get( "title" )));
|
229 |
gk.setKeywords(StringUtil.null2String(doc.get( "keywords" )));
|
230 |
gk.setDescribes(StringUtil.null2String(doc.get( "describes" )));
|
234 |
catch (Exception e) {
|
单字段查询:
01 |
long a = System.currentTimeMillis();
|
03 |
int curpage = StringUtil.StringToInt(StringUtil.null2String(form.get( "curpage" )));
|
04 |
int pagesize = StringUtil.StringToInt(StringUtil.null2String(form.get( "pagesize" )));
|
05 |
String title = StringUtil.replaceLuceneStr(StringUtil.null2String(form.get( "title" )));
|
06 |
LuceneQuery lu = new LuceneQuery();
|
07 |
form.addResult( "list" , lu.queryIndexTitle(title, curpage, pagesize));
|
08 |
form.addResult( "curPage" , lu.getCurrentPage());
|
09 |
form.addResult( "pageSize" , lu.getPageSize());
|
10 |
form.addResult( "rowCount" , lu.getRowCount());
|
11 |
form.addResult( "pageCount" , lu.getPages());
|
12 |
} catch (Exception e) {
|
15 |
long b = System.currentTimeMillis();
|
17 |
System.out.println( "[搜索信息花费时间:" + c + "毫秒]" );
|
多字段查询:
01 |
long a = System.currentTimeMillis();
|
03 |
int curpage = StringUtil.StringToInt(StringUtil.null2String(form.get( "curpage" )));
|
04 |
int pagesize = StringUtil.StringToInt(StringUtil.null2String(form.get( "pagesize" )));
|
05 |
String allkeyword = StringUtil.replaceLuceneStr(StringUtil.null2String(form.get( "allkeyword" )));
|
06 |
String onekeyword = StringUtil.replaceLuceneStr(StringUtil.null2String(form.get( "onekeyword" )));
|
07 |
String nokeyword = StringUtil.replaceLuceneStr(StringUtil.null2String(form.get( "nokeyword" )));
|
08 |
LuceneQuery lu = new LuceneQuery();
|
09 |
form.addResult( "list" , lu.queryIndexFields(allkeyword,onekeyword,nokeyword, curpage, pagesize));
|
10 |
form.addResult( "curPage" , lu.getCurrentPage());
|
11 |
form.addResult( "pageSize" , lu.getPageSize());
|
12 |
form.addResult( "rowCount" , lu.getRowCount());
|
13 |
form.addResult( "pageCount" , lu.getPages());
|
14 |
} catch (Exception e) {
|
17 |
long b = System.currentTimeMillis();
|
19 |
System.out.println( "[高级检索花费时间:" + c + "毫秒]" );
|
4、Lucene通配符查询
1 |
BooleanQuery bQuery = new BooleanQuery();
|
2 |
if (! "" .equals(title)) {
|
3 |
WildcardQuery w1 = new WildcardQuery( new Term( "title" , title+ "*" ));
|
5 |
bQuery.add(w1, BooleanClause.Occur.MUST);
|
7 |
int hm = start + pageSize;
|
8 |
TopScoreDocCollector res = TopScoreDocCollector.create(hm, false );
|
9 |
searcher.search(bQuery, res); |
5、Lucene嵌套查询
实现SQL:(unitid like 'unitid%' and idml like 'id2%') or (tounitid like 'unitid%' and tomlid like 'id2%' and tostate=1)
01 |
BooleanQuery bQuery = new BooleanQuery();
|
02 |
BooleanQuery b1 = new BooleanQuery();
|
03 |
WildcardQuery w1 = new WildcardQuery( new Term( "unitid" , unitid + "*" ));
|
04 |
WildcardQuery w2 = new WildcardQuery( new Term( "idml" , id2 + "*" ));
|
05 |
b1.add(w1, BooleanClause.Occur.MUST);
|
06 |
b1.add(w2, BooleanClause.Occur.MUST);
|
07 |
bQuery.add(b1, BooleanClause.Occur.SHOULD);
|
08 |
BooleanQuery b2 = new BooleanQuery();
|
09 |
WildcardQuery w3 = new WildcardQuery( new Term( "tounitid" , unitid + "*" ));
|
10 |
WildcardQuery w4 = new WildcardQuery( new Term( "tomlid" , id2 + "*" ));
|
11 |
WildcardQuery w5 = new WildcardQuery( new Term( "tostate" , "1" ));
|
12 |
b2.add(w3, BooleanClause.Occur.MUST);
|
13 |
b2.add(w4, BooleanClause.Occur.MUST);
|
14 |
b2.add(w5, BooleanClause.Occur.MUST);
|
15 |
bQuery.add(b2, BooleanClause.Occur.SHOULD);
|
6、Lucene先根据时间排序后分页
01 |
int hm = start + pageSize;
|
02 |
Sort sort = new Sort( new SortField( "pdate" , SortField.STRING, true ));
|
03 |
TopScoreDocCollector res = TopScoreDocCollector.create(pageSize, false );
|
04 |
searcher.search(bQuery, res); |
05 |
this .rowCount = res.getTotalHits();
|
06 |
this .pages = (rowCount - 1 ) / pageSize + 1 ;
|
07 |
TopDocs tds =searcher.search(bQuery,rowCount,sort);
|
08 |
ScoreDoc[] sd = tds.scoreDocs; |
09 |
System.out.println( "rowCount:" + rowCount);
|
11 |
for (ScoreDoc scoreDoc : sd) {
|
19 |
Document doc = searcher.doc(scoreDoc.doc);
|
20 |
list.add(createObj(doc));
|
这个效率不高,正常的做饭是创建索引的时候进行排序,之后使用分页方法,不要这样进行2次查询。
分享到:
相关推荐
本文将深入探讨Lucene.NET如何进行中文分词以及高亮显示的实现。 ### 1. 中文分词 中文分词是将连续的汉字序列切分成具有语义的词语的过程,是自然语言处理(NLP)中的基础步骤。在Lucene.NET中,为了支持中文分词...
总的来说,“lucene3.6.jar”与IkAnalyzer的结合,为开发者提供了一套强大的中文全文检索解决方案,既兼顾了搜索的效率,又保证了分词的准确性。通过深入理解和熟练运用这两个工具,可以在各种Java应用中实现高效且...
1. 查询构造:Lucene支持多种查询语法,如布尔查询、短语查询、范围查询等。通过QueryParser,我们可以根据用户输入的关键词生成对应的查询对象。 2. 查询执行:使用IndexSearcher类执行查询。IndexSearcher会遍历...
总结来说,Lucene 3.6提供了一套完整的全文检索解决方案,从文本预处理、索引构建到查询执行,再到结果展示,涵盖了搜索引擎的各个环节。熟练掌握Lucene,能够帮助你构建自己的高效搜索系统,满足各种数据检索需求。...
- 可以查看索引结构、文档信息、查询等。 #### 6.2 Tika - Tika是Apache的元数据提取库,可解析多种文件格式并提取文本。 - 原理包括解析文件、提取内容、转换文本等。 #### 6.3 高亮显示 - 自定义高亮标签,突出...
IKAnalyzer是一款广受欢迎的开源中文分词器,主要用于提高中文信息检索、文本分析等领域的效率。这个修复源码是针对IKAnalyzer2012版本的一个改进,解决了原版中无法添加自定义扩展的中文停用词的问题。在信息处理中...
在网上找了实例,但是发现不能使用,只能简历索引。...lucene3.6版本,能够建立索引,能搜索。inderwriter,indexsearch. 其中包C下的helloword实例能用,其余的全是网上不能用的。直接下载 可以运行
lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全(lucene3.6 模仿百度自动补全
本话题聚焦于“Lucene多字段查询”和“文字高亮显示”,这两个特性在信息检索和数据挖掘中具有广泛应用。 首先,让我们深入理解“Lucene多字段查询”。在信息检索系统中,用户可能希望根据多个字段来过滤和排序结果...
标题"IKAnalyzer LUCENE.4.9 中文分词的高亮显示"表明我们将探讨如何使用IKAnalyzer与Lucene 4.9版本相结合,实现搜索结果的关键词高亮功能。高亮显示有助于用户快速识别和理解搜索结果中的重要信息。 IKAnalyzer的...
在描述中提到的IK中文分词器,即 Intelligent Chinese Analyzer for Lucene,是专门针对中文的分词工具,能够处理停用词(如“的”、“是”等常见无实际意义的词语)并支持扩展词典,提高中文搜索的准确性。...
3. 高亮显示:使用Highlighter组件,可以对查询结果的关键词进行高亮,提高用户体验。 4. 断词分析:对于中文等非英文语言,需配合合适的分词器,如ikanalyzer,进行断词处理。 5. 排序与过滤:除了默认的按评分...
总结,Lucene 3.6 是一个强大的全文检索工具,通过理解它的基本概念、主要组件以及不同类型的查询,开发者可以快速构建起自己的全文搜索引擎。对于初学者而言,这个版本提供了足够的学习资源和实践机会,是深入理解...
在本案例中,我们使用的是 IKAnalyzer,这是一个针对中文的开源分词器,能较好地处理中文分词问题。 4. **索引writer(IndexWriter)**:负责创建或更新索引。我们可以向 IndexWriter 添加文档,然后调用 commit() ...
在版本3.6中,Lucene提供了一整套强大的文本搜索功能,包括索引、查询、排序、分词等。这个版本的源代码为我们提供了深入了解搜索引擎工作原理的机会。 首先,我们要明确的是,Lucene的核心在于它的索引机制。在`...
《深入理解Lucene 3.6与IKAnalyzer 2012FF_u1:构建高效中文搜索引擎》 在信息技术领域,搜索引擎是数据检索的核心工具,而Lucene作为Apache软件基金会的开源全文搜索引擎库,因其强大的搜索功能和灵活性,被广泛...
来自“猎图网 www.richmap.cn”基于IKAnalyzer分词算法的准商业化Lucene中文分词器。 1. 正向全切分算法,42万汉字字符/每秒的处理能力(IBM ThinkPad 酷睿I 1.6G 1G内存 WinXP) 2. 对数量词、地名、路名的...
4. **查询解析器(Query Parser)**:将用户输入的查询字符串转化为Lucene能够理解的查询对象。 5. **搜索器(Searcher)**:搜索器是实际执行查询的对象,它会查找与查询匹配的文档,并返回结果。 6. **分词器...
6. **高级特性**:可能涉及更复杂的搜索技术,如布尔查询、短语查询、模糊查询、范围查询,以及如何使用Filter和Highlighter进行结果过滤和高亮显示。 7. **优化与性能**:讨论如何通过调整参数、使用复合索引、...
SSH + Lucene + 分页 + 排序 + 高亮 模拟简单新闻网站搜索引擎--NewsWithSearch.part3 SSH + Lucene + 分页 + 排序 + 高亮 模拟简单新闻网站搜索引擎--NewsWithSearch.part2 SSH + Lucene + 分页 + 排序 + 高亮 ...