lucene搜索

yuxuguang

浏览: 140883 次
性别:
来自: 北京

最近访客更多访客>>

lgl4223939

AriesChan

暮霭_

code3lave

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

java

lucene 搜索引擎 Bean Apache quartz

这里做的lucene是根据一个表里的网址链接抓取网页生成索引。

线程配置文件

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">
<!-- blogindextop -->
<beans>
 <!-- (装载定时器)-->
 <bean class="org.springframework.scheduling.quartz.SchedulerFactoryBean">
    <property name="triggers">     
      <list> 
     	<ref bean="TaskStatTrigger" />	
     	<ref bean="TranslateCheckInfoTrigger" />		
      </list>
    </property>
  </bean>
  <!-- (定时器)每日定时执行 --> 
		<bean id="TaskStatTrigger" class="org.springframework.scheduling.quartz.CronTriggerBean">
		<property name="jobDetail">
			<bean class="org.springframework.scheduling.quartz.MethodInvokingJobDetailFactoryBean">
				<property name="targetObject" ref="TranslateLoadAttendanceInfoService" />
				<property name="targetMethod" value="translate" />
				<property name="concurrent" value="false" />
			</bean>
		</property>
		<!-- 0 代表秒，27代表分，15代表小时（24小时制）
			？不代表任何值（也就是对日期不做要求），* 在这代表1-12月的每一个月，

			FRI 是星期五
			如："0 15 10 * * ? *" 每天上午10:15执行
			   "0 15 10 ? * MON-FRI" 周一至周五的上午10:15
			   "34 10 9 ？ * SUN" 每个月的星期六的上午9:10:34 执行
			   "34 10 9 5 * ?" 每个月的5号上上午9:10:34 执行
					每天每隔10秒

		 -->
		<property name="cronExpression" value="0 00 00 * * ?" />
	</bean>
	<!-- (定时器)定时类 加入考勤人员 --> 
	<bean id="TranslateLoadAttendanceInfoService" parent="txProxyTemplate">
    	<property name="target">
      		<bean class="com.sdfxw.office.service.TranslateLoadAttendanceInfoServiceImp">
		        <property name="attendancelDAO">
		          <ref bean="AttendancelDAO" />
		        </property>
		        <property name="personNelInfoDAO">
		          <ref bean="PersonNelInfoDAO" />
		        </property>
		       <property name="attendanceRuleDAO">
					<ref bean="AttendanceRuleDAO" />
				</property>
      		</bean>
    	</property>
	</bean>

struts配置文件

	 <!--搜索引擎--> 
	<action parameter="actionName" path="/search" type="com.sdfxw.search.action.SearchAction" >
            <forward name="searchview"   path="/jsp/search/search.jsp" />
            <forward name="customview"   path="/jsp/search/custom.jsp" />
       </action>

建表语句

-- Create table
create table SEARCHLINK
(
  LINKID VARCHAR2(50) not null,
  URL    VARCHAR2(200),
  MODEL  VARCHAR2(50)
)

先做action

package com.sdfxw.search.action;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;
import org.apache.struts.actions.DispatchAction;
import org.carf.common.spring.ApplicationFactory;
import org.carf.util.common.ParameterUtil;
import org.carf.util.page.PageViewContext;

import com.sdfxw.search.service.SearchService;




public class SearchAction extends DispatchAction
{
    public ActionForward search(ActionMapping mapping, ActionForm form,
            HttpServletRequest request, HttpServletResponse response) throws Exception
    {
        String keyword = ParameterUtil.getParameter(request, "keyword");
        String page = ParameterUtil.getParameter(request, "page");
        //if(StringUtils.isNotBlank(keyword)){
	        SearchService service = (SearchService) ApplicationFactory
	                .getService("SearchService");
	        String querystr = "Content:" + keyword;
	        PageViewContext pp = service.query(querystr, 20, page);
	        request.setAttribute("PP", pp);
        //}
        return mapping.findForward("searchview");
    }

    public ActionForward custom(ActionMapping mapping, ActionForm form,
            HttpServletRequest request, HttpServletResponse response) throws Exception
    {
        String keyword = ParameterUtil.getParameter(request, "keyword");
        String page = ParameterUtil.getParameter(request, "page");
        SearchService service = (SearchService) ApplicationFactory
                .getService("SearchService");
        String querystr = "Content:" + keyword + " OR Caption:" + keyword;        
        PageViewContext pp = service.querycustom(querystr, 20, page);
        request.setAttribute("PP", pp);
        return mapping.findForward("customview");
    }
}

service代码（生成索引，搜索索引）

package com.sdfxw.search.service;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.html.HTMLParser;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.carf.util.common.WebFormatter;
import org.carf.util.page.PageViewContext;
import org.carf.util.page.PageViewUtil;
import org.springframework.core.io.Resource;

import com.sdfxw.search.dao.URLDao;

public class SearchService
{
    public final String MODEL_SAFE = "1";

    public final String MODEL_EXPERT = "13";

    public final String MODEL_PRODUCT = "2";

    public final String MODEL_ING = "31";

    public final String MODEL_CHANCE = "41";

    public final String MODEL_STORY = "42";

    public final String MODEL_ANGEL = "43";

    public final String MODEL_MONTHLY = "53";

    public final String MODEL_JOB = "75";

    public final String MODEL_CUSTOM = "16";
    
    public final String MODEL_MEDIUM = "76";
    
    public final String MODEL_AGENCY = "77";
    
    public final String MODEL_DOWNLOAD = "78";

    private Resource indexDir;

    private String indexPrefix;    

    private URLDao urldao;

    private static final String[] specialChar = new String[] { "\\", "+", "-", "&&",
            "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":" };

    public PageViewContext query(String queryStr, int pageSize, String pageNum)
            throws IOException, ParseException
    {
        IndexSearcher indexSearcher = new IndexSearcher(indexDir.getFile().getPath());
        QueryParser queryParser = new QueryParser("Content", new StandardAnalyzer());
        Query query = queryParser.parse(queryStr);
//        Query query = MultiFieldQueryParser.parse(new String[]{"Content","Model"}, new String[]{queryStr,MODEL_SAFE}, new StandardAnalyzer());

//        BooleanQuery query2 = new BooleanQuery();
//        query2.add(arg0, arg1)
        
        Hits hits = indexSearcher.search(query);
        PageViewContext pp = PageViewUtil.getPageViewContext(pageSize, pageNum, hits);
        indexSearcher.close();
        return pp;
    }

    public PageViewContext querycustom(String queryStr, int pageSize, String pageNum)
            throws IOException, ParseException
    {
        IndexSearcher indexSearcher = new IndexSearcher(indexDir.getFile().getPath()
                + "_custom");
        QueryParser queryParser = new QueryParser("Content", new StandardAnalyzer());
        Query query = queryParser.parse(queryStr);
        Hits hits = indexSearcher.search(query);
        PageViewContext pp = PageViewUtil.getPageViewContext(pageSize, pageNum, hits);
        indexSearcher.close();
        return pp;
    }

    public void createIndex()
    {
        List list = urldao.getURL();
        for (int i = 0; i < list.size(); i++)
        {
            Map map = (Map) list.get(i);
            String urlstr = (String) map.get("URL");
            String model = (String) map.get("Model");
            try
            {
                createOneItem(urlstr, model);
            }
            catch (Exception e)
            {
                e.printStackTrace(System.out);
            }
        }
        try
        {
            String path = indexDir.getFile().getPath();
            File f = new File(path + "_tmp");
            try
            {
                FileUtils.forceDelete(indexDir.getFile());
            }
            catch (Exception ee)
            {
            }
            f.renameTo(indexDir.getFile());
        }
        catch (Exception e)
        {
            e.printStackTrace(System.out);
        }
    }

    private void createOneItem(String urlstr, String model) throws Exception
    {
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        String path = indexDir.getFile().getPath() + "_tmp";
        boolean create = !indexExist(path);

        IndexWriter indexWriter = new IndexWriter(path, luceneAnalyzer, create);
        try
        {
            indexWriter.setMergeFactor(1500);
            Document doc = new Document();

            if (!urlstr.startsWith("\\") && !urlstr.startsWith("/"))
                urlstr = "/" + urlstr;
            urlstr = StringUtils.replace(urlstr, "\\", "/");

            Field f_url = new Field("URL", urlstr, Field.Store.YES,
                    Field.Index.UN_TOKENIZED);
            try
            {
                InputStream is = getHtmlStream(urlstr);
                String content = IOUtils.toString(is);
                content = WebFormatter.html2text(content);
                content = StringUtils.deleteWhitespace(content);
                String summary = StringUtils.abbreviate(content, 200); 

                is = getHtmlStream(urlstr);
                HTMLParser parser = new HTMLParser(is);
                Field f_title = new Field("Title", parser.getTitle(), Field.Store.YES,
                        Field.Index.TOKENIZED);

                Field f_model = new Field("Model", model, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                Field f_summary = new Field("Summary", summary, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);

                Field f_content = new Field("Content", content, Field.Store.NO,
                        Field.Index.TOKENIZED);
                doc.add(f_url);
                doc.add(f_title);
                doc.add(f_summary);
                doc.add(f_content);
                indexWriter.addDocument(doc);
                indexWriter.optimize();
            }
            catch (Exception e)
            {
                e.printStackTrace(System.out);
            }
        }
        finally
        {
            indexWriter.close();
        }
        Thread t = Thread.currentThread();
    }

    private InputStream getHtmlStream(String urlstr) throws Exception
    {
        HttpClient httpclient = new HttpClient();
        System.out.println(indexPrefix + urlstr);
        
        PostMethod httppost = new PostMethod(indexPrefix + urlstr);
        httpclient.executeMethod(httppost);
        InputStream is = httppost.getResponseBodyAsStream();
        return is;
    }

    public boolean indexExist(String indexDir)
    {
        return IndexReader.indexExists(indexDir);
    }

    private String EscapSpecialChar(String str)
    {
        for (int i = 0; i < specialChar.length; i++)
        {
            StringUtils.replace(str, specialChar[i], "\\" + specialChar[i]);
        }
        return str;
    }

    public String genFiled(String fName, String fValue)
    {
        String str = fName + ":\"" + EscapSpecialChar(fValue) + "\"";
        return str;
    }

    public String getIndexPrefix()
    {
        return indexPrefix;
    }

    public void setIndexPrefix(String indexPrefix)
    {
        this.indexPrefix = indexPrefix;
    }  

    public URLDao getUrldao()
    {
        return urldao;
    }

    public void setUrldao(URLDao urldao)
    {
        this.urldao = urldao;
    }

    public void setIndexDir(Resource indexDir)
    {
        this.indexDir = indexDir;
    }

    public void insertSearchLink(String modelID, String url)
    {
        Map map = new HashMap();
        map.put("Model", modelID);
        map.put("URL", url);
        this.urldao.insert(map);
    }

    public void deleteSearchLink(String modelID)
    {
        this.urldao.deleteByModelID(modelID);
    }

    public void createCustomIndex(List list) throws Exception
    {
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        String temppath = indexDir.getFile().getPath() + "_customtmp";
        String path = indexDir.getFile().getPath() + "_custom";
        boolean create = !indexExist(temppath);

        IndexWriter indexWriter = new IndexWriter(temppath, luceneAnalyzer, create);
        try
        {
            indexWriter.setMergeFactor(1500);
            for (int i = 0; i < list.size(); i++)
            {

                Map map = (Map) list.get(i);
                String Caption = (String) map.get("Caption");
                Object IssueDate = map.get("IssueDate");
                String IssueDateStr = "";
                if (IssueDate != null)
                {
                    IssueDateStr = IssueDate.toString().substring(0, 10);
                }
                else
                {
                    IssueDateStr = "";
                }
                String ID = (String) map.get("ID");
                String Content = (String) map.get("Content");// 内容
                Document doc = new Document();

                Field f_ID = new Field("ID", ID, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                Field f_Caption = new Field("Caption", Caption, Field.Store.YES,
                        Field.Index.TOKENIZED);
                Field f_content = new Field("Content", Content, Field.Store.NO,
                        Field.Index.TOKENIZED);
                Field f_IssueDate = new Field("IssueDate", IssueDateStr, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                doc.add(f_ID);
                doc.add(f_Caption);
                doc.add(f_content);
                doc.add(f_IssueDate);
                indexWriter.addDocument(doc);
                indexWriter.optimize();
            }
        }
        finally
        {
            indexWriter.close();
        }
        try
        {
            File ftemp = new File(temppath);
            File f = new File(path);
            try
            {
                FileUtils.forceDelete(f);
            }
            catch (Exception ee)
            {
            }
            ftemp.renameTo(f);
        }
        catch (Exception e)
        {
            e.printStackTrace(System.out);
        }
    }
    
    public List getURL(String url)
    {
    	return urldao.getURL();
    }
    
    public void update(Map map)
    {
    	urldao.update(map);
    }
    
    public void insertOrupdateByUrl(String url,String model)
    {
    	//String searchurl = url.replace("&", "'||chr(38)||'");
    	String searchurl = StringUtils.replace(url, "&", "'||chr(38)||'");
    	List results = urldao.getURL(searchurl);
    	if((results == null || results.size()==0) && StringUtils.isNotBlank(model))
    	{
    		Map map = new HashMap();
    		map.put("URL", url);
    		map.put("MODEL", model);
    		urldao.insert(map);
    	}else if(results != null && results.size()>0 && StringUtils.isNotBlank(url)){
    		Map map = (Map)results.get(0);
    		map.put("URL", url);
    		urldao.update(map);
    	}
    }
    
    public void deleteByUrl(String url)
    {
    	url = StringUtils.replace(url, "&", "'||chr(38)||'");
    	urldao.deleteByUrl(url);
    }
}

dao层代码

package com.sdfxw.search.dao;

import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringEscapeUtils;
import org.carf.common.jdbc.JdbcDaoSupportEx;
import org.carf.common.jdbc.JdbcTemplateEx;

public class URLDao extends JdbcDaoSupportEx 
{
	public List getURL()
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "select * from SEARCHLINK";
		return template.queryForList(sql);
	}
	
	public List getURL(String url)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "select * from SEARCHLINK where URL ='"+ StringEscapeUtils.escapeSql(url) +"'";
		return template.queryForList(sql);
	}
	
	public void insert(Map map)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		map.put("LINKID", "-2");
		template.insertMap(map, "SEARCHLINK", "LINKID");
	}
	
	public void update(Map map)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		template.updateMap(map, "SEARCHLINK", "LINKID");
	}
	
	public void deleteByUrl(String url)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "DELETE FROM SEARCHLINK WHERE URL='"+url + "'";
		template.update(sql);
	}
	
	public void deleteByModelID(String modelID)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "DELETE FROM SEARCHLINK WHERE Model=?";
		template.update(sql, new Object[] { modelID });
	}
}

分享到：

Java 的 HTTP 文件队列下载（读取流） | js 精确加减乘除运算

2010-01-06 10:04
浏览 1354
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene搜索

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene搜索

评论

发表评论

相关推荐

ArrayList扩容问题

通过反射获取对象的值

java截取中英文混杂字符串

XML特殊字符转义

java正则表达式

jdbc连接数据库（转）

读取文件夹下所有文件

使用Eclipse的快捷方式

多Tomcat配置启动项

Java 的 HTTP 文件队列下载（读取流）

poi导入excel

时间类型比较

java取得几个月后时间

得到一周开始时间和结束时间

最近访客更多访客>>