`
xxp3369
  • 浏览: 151318 次
  • 性别: Icon_minigender_1
  • 来自: 深圳
社区版块
存档分类
最新评论

bjsxt Lucene项目的源码

阅读更多
Web.xml

<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.4" 
	xmlns="http://java.sun.com/xml/ns/j2ee" 
	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
	xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee 
	http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd">
  <welcome-file-list>
    <welcome-file>index.jsp</welcome-file>
  </welcome-file-list>
</web-app>


DataFilter.java

package com.bjsxt.lucene.util;

public class DataFilter {
	public static String getHTML(String sourcestr){
        if (sourcestr == null) {
            return "";
        }
        sourcestr = sourcestr.replaceAll("\\x26", "&amp;");//&
        sourcestr = sourcestr.replaceAll("\\x3c", "&lt;");//<
        sourcestr = sourcestr.replaceAll("\\x3e", "&gt;");//>
        sourcestr = sourcestr.replaceAll("\\x09", "&nbsp;&nbsp;&nbsp;&nbsp;");//tab键
        sourcestr = sourcestr.replaceAll("\\x20", "&nbsp;");//空格
        sourcestr = sourcestr.replaceAll("\\x22", "&quot;");//"

        sourcestr = sourcestr.replaceAll("\r\n", "<br>");//回车换行
        sourcestr = sourcestr.replaceAll("\r", "<br>");//回车
        sourcestr = sourcestr.replaceAll("\n", "<br>");//换行
        return sourcestr;
	}
}



IndexHTML.java


import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.LockObtainFailedException;

public class IndexHTML {
	public static void main(String[] args) throws Exception {
		String root = "D:\\share\\0100_J2SE\\soft\\html_zh_CN\\html\\zh_CN\\api\\index.html";
		String index = "D:\\share\\0400_Servlet_JSP\\soft\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\index_en";
		IndexWriter writer = null;
		try {
			writer = new IndexWriter(index, new StandardAnalyzer(),
					true);
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		File f = new File(root);
		Document doc = new Document();
		doc.add(new Field("path",f.getPath(),Field.Store.YES,Field.Index.NO));
		doc.add(new Field("title","我们是共产主义接班人",Field.Store.YES,Field.Index.TOKENIZED));
		doc.add(new Field("size","000129",Field.Store.YES,Field.Index.UN_TOKENIZED));
		doc.add(new Field("content",new FileReader(f)));
		
		writer.addDocument(doc);
		writer.optimize();
		writer.close();

	}
}


IndexHTMLTidy.java

import java.io.File; 
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.text.DecimalFormat;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.w3c.tidy.Tidy;

public class IndexHTMLTidy {
	static String root = "D:\\share\\0100_J2SE\\soft\\html_zh_CN\\html\\zh_CN\\api\\java\\lang";
	static String index = "D:\\share\\0400_Servlet_JSP\\soft\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\index_cn";
	
	static Document doc = null;
	static IndexWriter writer = null;

	public static void main(String[] args) throws Exception {
		writer = new IndexWriter(index, new MMAnalyzer(), true);
		File f = new File(root);
		indexDocs(f);
		writer.optimize();
		writer.close();
		System.out.println("ok...");
	}

	// 递归调用
	public static void indexDocs(File f) throws Exception {
		if (f.isDirectory()) {
			File files[] = f.listFiles();
			for (int i = 0; i < files.length; i++) {
				indexDocs(files[i]);
			}
		} else if (f.getName().endsWith(".html")) {
			indexDoc(f);
		}
	}

	// 索引一个文件
	public static void indexDoc(File f) throws Exception {
		doc = new Document();
		System.out.println(f.getPath());
		doc
				.add(new Field("path", f.getPath(), Field.Store.YES,
						Field.Index.NO));
		String size = new DecimalFormat("0000000000").format(f.length());
		doc.add(new Field("size", size, Field.Store.YES,
				Field.Index.UN_TOKENIZED));
		doc.add(new Field("lastmodified", DateTools.timeToString(f
				.lastModified(), DateTools.Resolution.DAY), Field.Store.YES,
				Field.Index.UN_TOKENIZED));

		Tidy tidy = new Tidy();
		tidy.setQuiet(true);
		tidy.setShowWarnings(false);
		// 乱码
		// org.w3c.dom.Document root = tidy.parseDOM(new FileInputStream(f),System.out);

		// 解决乱码问题
		// java.io.InputStream定义了抽象方法read(),从此输入流中读取一个数据字节。
		// java.io.FileInputStream实现了父类中定义的方法read()
		// public class InputStreamReader extends java.io.Reader
		// public class FileInputStream extends java.io.InputStream
		// InputStreamReader的方法read()-->读取单个字符。
		InputStreamReader ips = new InputStreamReader(new FileInputStream(f),"gb2312");
		// 适配器模式
		InputStream is = new ReaderToInputStream(ips);

		org.w3c.dom.Document root = tidy.parseDOM(is, null);
		// 得到根元素
		Element rawDoc = root.getDocumentElement();
		//得到title内容
		String title = getTitle(rawDoc);
		//得到body内容
		String body = getBody(rawDoc);
		
		System.out.println(title);        
		
		doc.add(new Field("title", title, Field.Store.YES,Field.Index.TOKENIZED));
		
		String summary = body;
		if (body.length() >= 200) {
			summary = body.substring(0, 200);
		}
		doc.add(new Field("summary", summary, Field.Store.YES,Field.Index.TOKENIZED));
		doc.add(new Field("content", body, Field.Store.NO,Field.Index.TOKENIZED));
		writer.addDocument(doc);
	}

	// 适配器
	public static class ReaderToInputStream extends InputStream {
		Reader reader;

		public ReaderToInputStream(Reader reader) {
			super();
			this.reader = reader;
		}

		@Override
		public int read() throws IOException {
			try {
				return reader.read();
			} catch (IOException e) {
				throw e;
			}
		}
	}

	// 得到title标签内容
	protected static String getTitle(Element rawDoc) {
		if (rawDoc == null) {
			return "";
		}
		String title = "";
		NodeList children = rawDoc.getElementsByTagName("title");
		if (children.getLength() > 0) {
			Element titleElement = ((Element) children.item(0));
			Text text = (Text) titleElement.getFirstChild();
			if (text != null) {
				title = text.getData();
			}
		}
		return title;
	}

	// 得到body标签内容
	protected static String getBody(Element rawDoc) {
		if (rawDoc == null) {
			return "";
		}
		String body = "";
		NodeList children = rawDoc.getElementsByTagName("body");
		if (children.getLength() > 0) {
			body = getText(children.item(0));
		}
		return body;
	}

	// 递归调用,因为标签里面还有标签
	protected static String getText(Node node) {
		NodeList children = node.getChildNodes();
		StringBuffer sb = new StringBuffer();
		for (int i = 0; i < children.getLength(); i++) {
			Node child = children.item(i);
			switch (child.getNodeType()) {
			case Node.ELEMENT_NODE:
				sb.append(getText(child));
				sb.append(" ");
				break;
			case Node.TEXT_NODE:
				sb.append(((Text) child).getData());
				break;
			}
		}
		return sb.toString();
	}
}


SearchTest.java

import java.io.IOException;

import jeasy.analysis.MMAnalyzer;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;


public class SearchTest {
	public static void main(String[] args) throws Exception {
		String index = "D:\\share\\0400_Servlet_JSP\\soft\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\index_cn";
		IndexSearcher searcher=null;
		try {
			searcher = new IndexSearcher(index);
		} catch (CorruptIndexException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		
		Query query = new TermQuery(new Term("title","使用"));
//		QueryParser parser = new QueryParser("title",new MMAnalyzer());
//		parser.setDefaultOperator(QueryParser.AND_OPERATOR);
//		Query query = parser.parse("使用 软件包");
		
		
		Hits hits = searcher.search(query);
		System.out.println(hits.length());
		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			System.out.println(title);
		}
		searcher.close();
	}
}


TestBooleanQuery.java


import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;

 
public class TestBooleanQuery {
	public static void main(String[] args) throws Exception{
		String index = "D:\\share\\tools\\apache-tomcat-6.0.14\\apache-tomcat-6.0.14\\index_cn";
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		TermQuery term1 = new TermQuery(new Term("title","使用"));
		TermQuery term2 = new TermQuery(new Term("title","接口"));
		BooleanQuery query = new BooleanQuery();
		query.add(term1, BooleanClause.Occur.MUST);
		query.add(term2,BooleanClause.Occur.MUST_NOT);
		hits = searcher.search(query);
		
		System.out.println("total:"+hits.length());

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}



TestFuzzyQuery.java


import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
 
public class TestFuzzyQuery {
	public static void main(String[] args) throws Exception{
		String index ="D:\\share\\TOOLS\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\webapps\\index"; 
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		Query query = new FuzzyQuery(new Term("title", "filee"));
		hits = searcher.search(query);		
		System.out.println("total:"+hits.length());

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}


TestPhraseQuery.java


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.store.RAMDirectory;

 
public class TestPhraseQuery {
	public static void main(String[] args) throws Exception{
		String index ="D:\\share\\TOOLS\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\webapps\\index"; 
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		String s = "the quick brown fox jumped over the lazy dog.";
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);
		Document doc = new Document();
		doc.add(new Field("field",s,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc);
		writer.close();
		
		searcher = new IndexSearcher(directory);
		PhraseQuery query = new PhraseQuery();
		query.setSlop(3);
		query.add(new Term("field", "fox"));
		query.add(new Term("field", "quick"));
		hits = searcher.search(query);
		
		System.out.println("total:"+hits.length());

		for (int i = 0; i < hits.length(); i++) {
			Document doc1 = hits.doc(i);
			String title = doc1.get("title");
			String size = doc1.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}


TestPrefixQuery.java

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PrefixQuery;
 
public class TestPrefixQuery {
	public static void main(String[] args) throws Exception{
		String index ="D:\\share\\TOOLS\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\webapps\\index"; 
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		PrefixQuery query = new PrefixQuery(new Term("title","reg"));
		hits = searcher.search(query);
		
		System.out.println("total:"+hits.length());

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}


TestQueryParser.java

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.WildcardQuery;

 
public class TestQueryParser {
	public static void main(String[] args) throws Exception{
		String index ="D:\\share\\TOOLS\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\webapps\\index"; 
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		QueryParser parser = new QueryParser("title",new StandardAnalyzer());
		Query query = parser.parse("lucene+java");
		System.out.println(query.toString());
		hits = searcher.search(query);

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}


TestRangeQuery.java

import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;

 
public class TestRangeQuery {

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception{
		String index = "C:\\tomcat\\webapps\\index";
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		Term begin = new Term("size","0000000001");
		Term end = new Term("size","0000001000");
		RangeQuery query = new RangeQuery(begin,end,true);
		System.out.println(query.toString());
		hits = searcher.search(query);
		
		System.out.println("total:"+hits.length());

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}

}



TestSpanTermQuery.java


import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.RAMDirectory;

public class TestSpanTermQuery {
	
	static String index = "C:\\tomcat\\webapps\\index";
	static IndexSearcher searcher = null;
	static Hits hits = null;
	

	/**
	 * 在某种跨度范围内,查找关键词并匹配文档,称为跨度搜索
	 * There are five subclasses of the base SpanQuery,
	 * SpanNotQuery
	 * SpanOrQuery
	 */
	public static void testSpanTermQuery()throws Exception{
		String s = "Man always remember love because of romance only";
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);
		Document doc = new Document();
		doc.add(new Field("title",s,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc);
		writer.close();

		searcher = new IndexSearcher(directory);
		
		//SpanTermQuery,所完成的检索效果和TermQuery完全一样。
		//此外内部还记录了一些位置信息。这些信息将被SpanQuery家族的其他API所使用。
		Term t = new Term("title","remember");
		SpanTermQuery query = new SpanTermQuery(t);

		hits = searcher.search(query);
	}
	//跨度如果是3,则可以检索到文档。
	//扩度如果是2,则无法检索到文档。
	public static void testSpanFirstQuery()throws Exception{
		String s = "Man always remember love because of romance only";
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);
		Document doc = new Document();
		doc.add(new Field("title",s,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc);
		writer.close();

		searcher = new IndexSearcher(directory);
		SpanTermQuery brown = new SpanTermQuery(new Term("title", "remember"));
		SpanFirstQuery query = new SpanFirstQuery(brown, 3);
		hits = searcher.search(query);
	}
	//SpanNearQuery的构造方法中第2个参数,与PhraseQuery中坡度的概念相同。
	//即2个term之间搀入无关的词的数量。
	//第3个参数,表示词组中的每个词是否一定要按照顺序出现在文档中。
	//如果第2个参数改为2,则检索不到该文档了。
	public static void testSpanNearQuery()throws Exception{
		String s = "Man always remember love because of romance only";
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);
		Document doc = new Document();
		doc.add(new Field("title",s,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc);
		writer.close();

		searcher = new IndexSearcher(directory);
		SpanTermQuery brown = new SpanTermQuery(new Term("title", "remember"));
		Term t1 = new Term("title","Man");
		Term t2 = new Term("title","because");
		SpanTermQuery q1 = new SpanTermQuery(t1);
		SpanTermQuery q2 = new SpanTermQuery(t2);
		SpanNearQuery query = new SpanNearQuery(new SpanQuery [ ] {q1,q2},2,false);
		
		hits = searcher.search(query);
	}
	public static void testSpanNearQueryNested()throws Exception{
		String s = "aa bb cc dd ee ff gg hh ii jj kk";
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);
		Document doc = new Document();
		doc.add(new Field("title",s,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc);
		writer.close();

		searcher = new IndexSearcher(directory);
		Term t1 = new Term("title","aa");
		Term t2 = new Term("title","cc");
		SpanTermQuery s1 = new SpanTermQuery(t1);
		SpanTermQuery s2 = new SpanTermQuery(t2);
		Term t3 = new Term("title","gg");
		Term t4 = new Term("title","kk");
		SpanTermQuery s3 = new SpanTermQuery(t3);
		SpanTermQuery s4 = new SpanTermQuery(t4);
		SpanNearQuery query1 = new SpanNearQuery(new SpanQuery [] {s1,s2},1,false);
		SpanNearQuery query2 = new SpanNearQuery(new SpanQuery [] {s3,s4},3,false);
		SpanNearQuery query = new SpanNearQuery(new SpanQuery [] {query1,query2},3,false);

		hits = searcher.search(query);
	}
	
	//SpanOrQuery就是把所有SpanQuery的结果综合起来,作为它的检索结果。
	public static void testSpanOrQuery()throws Exception{
		String s = "aa bb cc dd ee ff gg hh ii jj kk";
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);
		Document doc = new Document();
		doc.add(new Field("title",s,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc);
		writer.close();

		searcher = new IndexSearcher(directory);
		Term t1 = new Term("title","aa");
		Term t2 = new Term("title","cc");
		SpanTermQuery s1 = new SpanTermQuery(t1);
		SpanTermQuery s2 = new SpanTermQuery(t2);
		Term t3 = new Term("title","ff");
		Term t4 = new Term("title","jj");
		SpanTermQuery s3 = new SpanTermQuery(t3);
		SpanTermQuery s4 = new SpanTermQuery(t4);
		SpanNearQuery query1 = new SpanNearQuery(new SpanQuery [] {s1,s2},1,false);
		SpanNearQuery query2 = new SpanNearQuery(new SpanQuery [] {s3,s4},3,false);
		SpanOrQuery query = new SpanOrQuery(new SpanQuery [] {query1,query2});

		hits = searcher.search(query);
	}
	//SpanNotQuery有两个SpanQuery参数,它所表示的含义是:
	//从第一个SpanQuery的查询结果中,去掉第二个SpanQuery的查询结果。
	//注意:SpanNotQuery会排除那些SpanQuery对象相交叠的文档
	public static void testSpanNotQuery()throws Exception{
		String s = "aa bb cc dd ee ff gg hh ii jj kk";
		String ss = "gg aa bb cc dd ee ff hh ii ii jj kk";
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);
		
		Document doc = new Document();
		doc.add(new Field("title",s,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc);
		
		Document doc1 = new Document();
		doc1.add(new Field("title",ss,Field.Store.YES,Field.Index.TOKENIZED));
		writer.addDocument(doc1);
		
		writer.optimize();
		writer.close();

		searcher = new IndexSearcher(directory);
		
		Term t1 = new Term("title","gg");
		SpanTermQuery s1 = new SpanTermQuery(t1);
		SpanFirstQuery query1 = new SpanFirstQuery(s1,9);
		Term t3 = new Term("title","ff");
		Term t4 = new Term("title","jj");
		SpanTermQuery s3 = new SpanTermQuery(t3);
		SpanTermQuery s4 = new SpanTermQuery(t4);
		SpanNearQuery query2 = new SpanNearQuery(new SpanQuery [] {s3,s4},3,false);
		SpanNotQuery query = new SpanNotQuery(query1,query2);
		
		System.out.println(query.toString());
		hits = searcher.search(query);
		//System.out.println(query1.toString());
		//hits = searcher.search(query1);
		//System.out.println(query2.toString());
		//hits = searcher.search(query2);
	}
	


	public static void main(String[] args) throws Exception {
		searcher = new IndexSearcher(index);
		//testSpanTermQuery();
		//testSpanFirstQuery();
		//testSpanNearQuery();
		//testSpanNearQueryNested();

		//testSpanNotQuery();
		System.out.println("total:"+hits.length());

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();
	}
}


TestTermQuery.java


import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
 

public class TestTermQuery {
	public static void main(String[] args) throws Exception{
		String index ="D:\\share\\TOOLS\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\webapps\\index"; 
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		TermQuery query = new TermQuery(new Term("title","lucene"));
		System.out.println(query.toString());
		hits = searcher.search(query);

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}



TestWildcardQuery.java


import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.WildcardQuery;
 
/**
 * 通配符搜索。*代表0到多个字符,?代表一个单一的字符
 */
public class TestWildcardQuery {
	public static void main(String[] args) throws Exception{
		String index ="D:\\share\\TOOLS\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\webapps\\index"; 
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		Query query = new WildcardQuery(new Term("title", "?ucli*"));
		System.out.println(query.toString());
		hits = searcher.search(query);

		for (int i = 0; i < hits.length(); i++) {
			Document doc = hits.doc(i);
			String title = doc.get("title");
			String size = doc.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}


TtestMultiPhraseQuery.java


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.store.RAMDirectory;
 

public class TtestMultiPhraseQuery {
	public static void main(String[] args) throws Exception{
		String index ="D:\\share\\TOOLS\\apache-tomcat-5.5.17\\apache-tomcat-5.5.17\\webapps\\index"; 
		IndexSearcher searcher = new IndexSearcher(index);
		Hits hits = null;
		
		RAMDirectory directory = new RAMDirectory();
		IndexWriter writer = new IndexWriter(directory,new StandardAnalyzer(),true);

		Document doc1 = new Document();
		Document doc2 = new Document();
		Document doc3 = new Document();
		Document doc4 = new Document();
		Document doc5 = new Document();
		Document doc6 = new Document();
		
		String s1 = "钢铁是怎样炼成的";
		String s2 = "钢铁战士";
		String s3 = "钢和铁是两种金属元素";
		String s4 = "钢要比铁有更多的碳元素";
		String s5 = "铁和钢是两种重要的金属";
		String s6 = "铁钢是两种重要的金属";
		
		Field f1 = new Field("title",s1,Field.Store.YES,Field.Index.TOKENIZED);
		Field f2 = new Field("title",s2,Field.Store.YES,Field.Index.TOKENIZED);
		Field f3 = new Field("title",s3,Field.Store.YES,Field.Index.TOKENIZED);
		Field f4 = new Field("title",s4,Field.Store.YES,Field.Index.TOKENIZED);
		Field f5 = new Field("title",s5,Field.Store.YES,Field.Index.TOKENIZED);
		Field f6 = new Field("title",s6,Field.Store.YES,Field.Index.TOKENIZED);
		
		doc1.add(f1);
		doc2.add(f2);
		doc3.add(f3);
		doc4.add(f4);
		doc5.add(f5);
		doc6.add(f6);
		
		writer.addDocument(doc1);
		writer.addDocument(doc2);
		writer.addDocument(doc3);
		writer.addDocument(doc4);
		writer.addDocument(doc5);
		writer.addDocument(doc6);
		
		writer.close();
		
		searcher = new IndexSearcher(directory);
		MultiPhraseQuery query = new MultiPhraseQuery();

		//title:"钢 (铁 和 要)"
		query.add(new Term("title","钢"));
		Term t1 = new Term("title","铁");
		Term t2 = new Term("title","和");
		Term t3 = new Term("title","要");
		query.add(new Term [] {t1,t2,t3});

		
		//title:"(铁 钢) 和"		
/*		Term t1 = new Term("title","铁");
		Term t2 = new Term("title","钢");
		query.add(new Term [] {t1,t2});
		query.add(new Term("title","和"));
*/		
		

		//title:"(钢 和) 铁 (是 战)"
/*		Term t1 = new Term("title","钢");
		Term t2 = new Term("title","和");
		query.add(new Term [] {t1,t2});

		query.add(new Term("title","铁"));

		Term t3 = new Term("title","是");
		Term t4 = new Term("title","战");
		query.add(new Term [] {t3,t4});
*/
		
	
		System.out.println(query.toString());
		hits = searcher.search(query);
		
		System.out.println("total:"+hits.length());

		for (int i = 0; i < hits.length(); i++) {
			Document docA = hits.doc(i);
			String title = docA.get("title");
			String size = docA.get("size");
			System.out.println(title+"..."+size);
			if (i>20){
				System.out.println("...");
				break;
			}
		}
		searcher.close();	
	}
}
分享到:
评论

相关推荐

    bjsxt -oa 源码

    bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码bjsxt -oa 源码

    bjsxt马JAR包.rar

    8. **反编译与调试**: 对于开发者而言,JAR文件可以通过反编译工具(如JD-GUI)查看源代码,尽管这违反了版权。同时,JAR也可以通过Java的调试模式运行以进行调试。 9. **服务端应用**: JAR文件常用于创建Web应用...

    ejb-152 源代码 ejb-152 源代码

    bjsxt 的源代码,自己学习的产品, 因为网络比较少的,自己冒险发布. 涉及很多方面的知识. 建议搭建环境 jdk 1.5 JBOSS 4.2.3 因为是在学习时候遇到问题的. 用的jdk 1.6 麻烦. 如果您觉得好可以下载, 这个贵拥有. 内容...

    Oracle BJSXT笔记PDF

    根据提供的信息,我们可以总结出以下Oracle数据库相关的知识点: ### 一、客户端连接方式 #### Dos命令行连接 ...``` sqlplusscott/tiger ``` #### 图形界面连接 可以通过开始菜单中的“运行”功能来启动SQL*Plus的...

    一些初学java的源代码,

    初学者通过学习Java源代码可以快速理解编程基础,掌握面向对象编程思想。在这个压缩包中,包含了一些针对初学者的Java源代码示例,帮助初学者深入理解Java的核心概念。 1. **bjsxt**:这个文件可能包含了Java的基础...

    java bjsxt

    NULL 博文链接:https://jerry-xin.iteye.com/blog/2242244

    maven的优缺点 项目

    用maven创建一个ssm项目,每一层的代码分开,把一个项目拆解成多个项目 2.[今日包含笔记] maven介绍: Eclise: 缺点: Jar包: 版本管理,解决jar包冲突:删除老的jar包,保留新的jar包 不同的项目,jar包要复制一份;...

    xml_bjsxt.rar_java xml_xml

    XML(eXtensible Markup ...在实际开发中,应根据项目需求选择合适的解析方式,如DOM适用于需要频繁查询和修改的场景,SAX和StAX适用于大文件或只读取部分数据的场景。同时,JAXB则为对象与XML之间的映射提供了便利。

    spring数据源配置

    ### Spring 数据源配置详解 #### 一、Spring与数据源简介 在Java企业级应用开发中,数据库操作是必不可少的一部分。Spring框架作为一种流行的轻量级Java应用开发框架,提供了强大的数据库访问支持,其中包括对数据...

    bjsxt ssh ppt

    在Model1模式中,业务逻辑和表示逻辑混合在一起,不利于维护和重用,而Struts引入了Model2模式,将用户交互、业务处理和视图展示分离,提高了代码的可维护性和可扩展性。在Model2中,浏览器发送HTTP请求到Controller...

    自学Java的同学-Java自学路线图

    3. 实践项目:在完成JavaSE学习后,尝试做些小项目,这有助于理解理论知识的实际应用。同时,接触设计模式,不必追求精通,但要有所了解。 4. 掌握工具:学习并熟练使用一种Java集成开发环境(IDE),如Eclipse,这...

    hibernate一对多多对一源码例子

    在Java开发中,Hibernate是一个非常重要...在实际项目中,这种关系模型可以帮助我们更好地组织数据,简化数据库操作,提高代码的可读性和维护性。理解并熟练运用这些关系,对于提升Java开发中的数据处理能力至关重要。

    Xdoclet介绍及示例.........

    Xdoclet是一种用于生成各种类型的文档或源代码的工具,它基于Java Doclets框架。与传统的Javadoc不同,Xdoclet允许开发者在Java源文件中添加特定格式的注释(称为Xdoclet标签),从而生成各种类型的配置文件或代码,...

    \hibernate操作心得.doc

    首先,需要建立一个 Java 项目,并创建一个名为 Hibernate_0100_helloworld 的项目。在这个项目中,需要建立一个名为 jzm_hibernate 的库,并将 Hibernate 的八个必需包添加到库中。这些包包括 hibernate-...

    mybatis初步环境搭建

    - **开源且免费**:这意味着开发者可以自由地使用和修改MyBatis的源代码,而不需要支付任何费用。 - **易于上手**:MyBatis的设计非常直观,即使是初学者也能快速掌握其基本用法。 - **半自动化的ORM框架**:它不像...

    很全面的struts2_ognl总结

    &lt;s:property value="@com.bjsxt.struts2.ognl.S@s()"/&gt; 这将调用 com.bjsxt.struts2.ognl.S 类的静态方法 s()。 四、访问 ValueStack 中的 List OGNL 也可以访问 ValueStack 中的 List,例如: 这将访问 ...

    hibernate关系映射配置.rar

    &lt;class name="com.bjsxt.hibernate.User" table="t_user"&gt; &lt;!-- --&gt; &lt;class name="com.bjsxt.hibernate.Group" table="t_group"&gt; 一对多单向: ...

    springboot案例,springboot+mybatis整合,springboot+mybatis+redis集群

    这段代码展示了如何在 Maven 项目中添加 Spring Boot 的 Web Starter 依赖,以便支持 MVC 功能。 ```java @SpringBootApplication public class SpringBootDemo { public static void main(String[] args) { ...

    Java程序员面试题及答案

    #### 一、代码输出结果解析 题目给出了一段Java代码,要求分析其输出结果。 ```java public class smallT { public static void main(String[] args[]) { smallT t = new smallT(); int b = t.get(); System....

    eclipse 开发常用快捷键

    Eclipse 开发常用快捷键 Eclipse 是一个功能强大且广泛使用的集成开发环境(IDE),它提供了许多快捷键来提高开发效率。下面将总结一些常用的 Eclipse 快捷键...掌握这些快捷键和项目规范可以提高开发效率和代码质量。

Global site tag (gtag.js) - Google Analytics