论坛首页 Java企业应用论坛

关于 lucene 内存溢出

浏览 5325 次
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
作者 正文
   发表时间:2007-08-14  



package business.ywbdmanagement;

import hibernate.YhbdProblem;
import common.AppContext;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import common.Constant;
import jeasy.analysis.MMAnalyzer;

public class BuidYwbdIndex {
	public void buildIndex() {
		/**
		 * create a dir for indexfile
		 * 
		 * @author xuhaimin
		 */
		File indexDir = new File(Constant.ROOTPATH
				+ "ywbdmanagement/index/problenindex");

		try {

			/**
			 * there i use a MMAnalyzer for indexwriter
			 * 
			 */
			MMAnalyzer mm = new MMAnalyzer();
			// boolean isFirst=indexDir.list().length==0;
			// log.info(isFirst);
			IndexWriter indexWriter = new IndexWriter(indexDir, mm, false);
			indexWriter.setMergeFactor(100);
			indexWriter.setMaxBufferedDocs(500);
			indexWriter.setMaxFieldLength(5000);

			/**
			 * get all results from news database
			
			/** 一个循环标志* */
			int begin = 0;
			/** 获取数据库中maxnum数量的items* */
			int maxnum = 100;
			while (true) {

				List ps = AppContext.getInstance().getYhbdProblemService()
						.findIndexRs("0", begin, maxnum);
				begin += maxnum;

				if (ps.size() == 0) {
					System.out
							.println(" cause by items.size() is zero so the program is stop");
					break;

				}

				for (int i = 0; i < ps.size(); i++) {

					YhbdProblem p = (YhbdProblem) ps.get(i);
					System.out.println(p.getWtnr());

					/**
					 * transfer Document() method
					 */

					Document doc = Document(p);

					indexWriter.addDocument(doc);

				}

				indexWriter.optimize();

				indexWriter.close();

			}

		} catch (IOException e) {

			System.out.println(" caught a " + e.getClass()
					+ "\n with message: " + e.getMessage());

		}

	}

	/**
	 * 
	 * @author xuhaimin
	 * @param item
	 * @return
	 * @throws java.io.IOException
	 */
	public static   Document Document(YhbdProblem p)

	throws java.io.IOException {
		System.out.println(p.getWtnr());

		Document doc = new Document();
		/** 从数据库取相关数据 建立相应的索引* */
		Field p_title = new Field("title", p.getWtbt(), Field.Store.YES,
				Field.Index.TOKENIZED, Field.TermVector.YES);

		Field p_content = new Field("content", p.getWtnr(),

		Field.Store.COMPRESS, Field.Index.TOKENIZED, Field.TermVector.YES);
		Field p_id = new Field("id", p.getWtbh().toString(), Field.Store.YES,
				Field.Index.NO);
		doc.add(p_title);
		doc.add(p_content);
		doc.add(p_id);
		
		return doc;

	}

	// public void deleteIndex(String id) throws IOException {
	// System.out.println("正在删除索引..............................");
	// IndexReader reader = IndexReader.open(org.zjut.util.Constant.WEBROOT
	// + org.zjut.util.Constant.INDEX_DIR);
	// Term term = new Term("id", id);
	// reader.deleteDocuments(term);
	// reader.close();
	// }

	// public void addIndex(Item item) {
	// log.info("正在增加索引................................");
	// File indexDir = new File(org.zjut.util.Constant.WEBROOT
	// + org.zjut.util.Constant.INDEX_DIR);
	// log.info("......................." + indexDir);
	// try {
	// Date start = new Date();
	// /**
	// * there i use a MMAnalyzer for indexwriter
	// *
	// */
	// MMAnalyzer mm = new MMAnalyzer();
	//
	// IndexWriter indexWriter = new IndexWriter(indexDir, mm, false);
	// Document doc = Document(item);
	// indexWriter.addDocument(doc);
	// indexWriter.optimize();
	//
	// indexWriter.close();
	//
	// Date end = new Date();
	//
	// log.info("create index: " + (end.getTime() - start.getTime())
	// + " total milliseconds");
	//
	// } catch (IOException e) {
	// log.info(" caught a " + e.getClass() + "\n with message: "
	// + e.getMessage());
	//
	// }
	// }

}











以上是建立索引代码  但是
for (int i = 0; i < ps.size(); i++) {

					YhbdProblem p = (YhbdProblem) ps.get(i);
					System.out.println(p.getWtnr());

					/**
					 * transfer Document() method
					 */

					Document doc = Document(p);

					indexWriter.addDocument(doc);

				}



第一个循环后到达
indexWriter.addDocument(doc);
就内存溢出了  什么原因的 大家讨论下
   发表时间:2007-08-14  
估计是你的jvm内存设置太小,je分词器在第一次分词的时候载入词典,需要占用一定的内存,建议-Xms128M -Xmx256M
0 请登录后投票
   发表时间:2007-08-15  
je分词的词典要占用45M左右的内存,默认开发环境为64M,没有设定-Xms的话基本上会内存溢出。
0 请登录后投票
论坛首页 Java企业应用版

跳转论坛:
Global site tag (gtag.js) - Google Analytics