`
yuxuguang
  • 浏览: 139212 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

lucene搜索

    博客分类:
  • java
阅读更多

这里做的lucene是根据一个表里的网址链接抓取网页生成索引。

线程配置文件

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd">
<!-- blogindextop -->
<beans>
 <!-- (装载定时器)-->
 <bean class="org.springframework.scheduling.quartz.SchedulerFactoryBean">
    <property name="triggers">     
      <list> 
     	<ref bean="TaskStatTrigger" />	
     	<ref bean="TranslateCheckInfoTrigger" />		
      </list>
    </property>
  </bean>
  <!-- (定时器)每日定时执行 --> 
		<bean id="TaskStatTrigger" class="org.springframework.scheduling.quartz.CronTriggerBean">
		<property name="jobDetail">
			<bean class="org.springframework.scheduling.quartz.MethodInvokingJobDetailFactoryBean">
				<property name="targetObject" ref="TranslateLoadAttendanceInfoService" />
				<property name="targetMethod" value="translate" />
				<property name="concurrent" value="false" />
			</bean>
		</property>
		<!-- 0 代表秒,27代表分,15代表小时(24小时制)
			?不代表任何值(也就是对日期不做要求),* 在这代表1-12月的每一个月,

			FRI 是星期五
			如:"0 15 10 * * ? *" 每天上午10:15执行
			   "0 15 10 ? * MON-FRI" 周一至周五的上午10:15
			   "34 10 9 ? * SUN" 每个月的星期六的上午9:10:34 执行
			   "34 10 9 5 * ?" 每个月的5号上上午9:10:34 执行
					每天每隔10秒

		 -->
		<property name="cronExpression" value="0 00 00 * * ?" />
	</bean>
	<!-- (定时器)定时类 加入考勤人员 --> 
	<bean id="TranslateLoadAttendanceInfoService" parent="txProxyTemplate">
    	<property name="target">
      		<bean class="com.sdfxw.office.service.TranslateLoadAttendanceInfoServiceImp">
		        <property name="attendancelDAO">
		          <ref bean="AttendancelDAO" />
		        </property>
		        <property name="personNelInfoDAO">
		          <ref bean="PersonNelInfoDAO" />
		        </property>
		       <property name="attendanceRuleDAO">
					<ref bean="AttendanceRuleDAO" />
				</property>
      		</bean>
    	</property>
	</bean> 

 struts配置文件

	 <!--搜索引擎--> 
	<action parameter="actionName" path="/search" type="com.sdfxw.search.action.SearchAction" >
            <forward name="searchview"   path="/jsp/search/search.jsp" />
            <forward name="customview"   path="/jsp/search/custom.jsp" />
       </action> 
 

 

建表语句

-- Create table
create table SEARCHLINK
(
  LINKID VARCHAR2(50) not null,
  URL    VARCHAR2(200),
  MODEL  VARCHAR2(50)
)
 

 

 

先做action

package com.sdfxw.search.action;

import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;
import org.apache.struts.actions.DispatchAction;
import org.carf.common.spring.ApplicationFactory;
import org.carf.util.common.ParameterUtil;
import org.carf.util.page.PageViewContext;

import com.sdfxw.search.service.SearchService;




public class SearchAction extends DispatchAction
{
    public ActionForward search(ActionMapping mapping, ActionForm form,
            HttpServletRequest request, HttpServletResponse response) throws Exception
    {
        String keyword = ParameterUtil.getParameter(request, "keyword");
        String page = ParameterUtil.getParameter(request, "page");
        //if(StringUtils.isNotBlank(keyword)){
	        SearchService service = (SearchService) ApplicationFactory
	                .getService("SearchService");
	        String querystr = "Content:" + keyword;
	        PageViewContext pp = service.query(querystr, 20, page);
	        request.setAttribute("PP", pp);
        //}
        return mapping.findForward("searchview");
    }

    public ActionForward custom(ActionMapping mapping, ActionForm form,
            HttpServletRequest request, HttpServletResponse response) throws Exception
    {
        String keyword = ParameterUtil.getParameter(request, "keyword");
        String page = ParameterUtil.getParameter(request, "page");
        SearchService service = (SearchService) ApplicationFactory
                .getService("SearchService");
        String querystr = "Content:" + keyword + " OR Caption:" + keyword;        
        PageViewContext pp = service.querycustom(querystr, 20, page);
        request.setAttribute("PP", pp);
        return mapping.findForward("customview");
    }
}
 

service代码(生成索引,搜索索引)

package com.sdfxw.search.service;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.html.HTMLParser;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.carf.util.common.WebFormatter;
import org.carf.util.page.PageViewContext;
import org.carf.util.page.PageViewUtil;
import org.springframework.core.io.Resource;

import com.sdfxw.search.dao.URLDao;

public class SearchService
{
    public final String MODEL_SAFE = "1";

    public final String MODEL_EXPERT = "13";

    public final String MODEL_PRODUCT = "2";

    public final String MODEL_ING = "31";

    public final String MODEL_CHANCE = "41";

    public final String MODEL_STORY = "42";

    public final String MODEL_ANGEL = "43";

    public final String MODEL_MONTHLY = "53";

    public final String MODEL_JOB = "75";

    public final String MODEL_CUSTOM = "16";
    
    public final String MODEL_MEDIUM = "76";
    
    public final String MODEL_AGENCY = "77";
    
    public final String MODEL_DOWNLOAD = "78";

    private Resource indexDir;

    private String indexPrefix;    

    private URLDao urldao;

    private static final String[] specialChar = new String[] { "\\", "+", "-", "&&",
            "||", "!", "(", ")", "{", "}", "[", "]", "^", "\"", "~", "*", "?", ":" };

    public PageViewContext query(String queryStr, int pageSize, String pageNum)
            throws IOException, ParseException
    {
        IndexSearcher indexSearcher = new IndexSearcher(indexDir.getFile().getPath());
        QueryParser queryParser = new QueryParser("Content", new StandardAnalyzer());
        Query query = queryParser.parse(queryStr);
//        Query query = MultiFieldQueryParser.parse(new String[]{"Content","Model"}, new String[]{queryStr,MODEL_SAFE}, new StandardAnalyzer());

//        BooleanQuery query2 = new BooleanQuery();
//        query2.add(arg0, arg1)
        
        Hits hits = indexSearcher.search(query);
        PageViewContext pp = PageViewUtil.getPageViewContext(pageSize, pageNum, hits);
        indexSearcher.close();
        return pp;
    }

    public PageViewContext querycustom(String queryStr, int pageSize, String pageNum)
            throws IOException, ParseException
    {
        IndexSearcher indexSearcher = new IndexSearcher(indexDir.getFile().getPath()
                + "_custom");
        QueryParser queryParser = new QueryParser("Content", new StandardAnalyzer());
        Query query = queryParser.parse(queryStr);
        Hits hits = indexSearcher.search(query);
        PageViewContext pp = PageViewUtil.getPageViewContext(pageSize, pageNum, hits);
        indexSearcher.close();
        return pp;
    }

    public void createIndex()
    {
        List list = urldao.getURL();
        for (int i = 0; i < list.size(); i++)
        {
            Map map = (Map) list.get(i);
            String urlstr = (String) map.get("URL");
            String model = (String) map.get("Model");
            try
            {
                createOneItem(urlstr, model);
            }
            catch (Exception e)
            {
                e.printStackTrace(System.out);
            }
        }
        try
        {
            String path = indexDir.getFile().getPath();
            File f = new File(path + "_tmp");
            try
            {
                FileUtils.forceDelete(indexDir.getFile());
            }
            catch (Exception ee)
            {
            }
            f.renameTo(indexDir.getFile());
        }
        catch (Exception e)
        {
            e.printStackTrace(System.out);
        }
    }

    private void createOneItem(String urlstr, String model) throws Exception
    {
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        String path = indexDir.getFile().getPath() + "_tmp";
        boolean create = !indexExist(path);

        IndexWriter indexWriter = new IndexWriter(path, luceneAnalyzer, create);
        try
        {
            indexWriter.setMergeFactor(1500);
            Document doc = new Document();

            if (!urlstr.startsWith("\\") && !urlstr.startsWith("/"))
                urlstr = "/" + urlstr;
            urlstr = StringUtils.replace(urlstr, "\\", "/");

            Field f_url = new Field("URL", urlstr, Field.Store.YES,
                    Field.Index.UN_TOKENIZED);
            try
            {
                InputStream is = getHtmlStream(urlstr);
                String content = IOUtils.toString(is);
                content = WebFormatter.html2text(content);
                content = StringUtils.deleteWhitespace(content);
                String summary = StringUtils.abbreviate(content, 200); 

                is = getHtmlStream(urlstr);
                HTMLParser parser = new HTMLParser(is);
                Field f_title = new Field("Title", parser.getTitle(), Field.Store.YES,
                        Field.Index.TOKENIZED);

                Field f_model = new Field("Model", model, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                Field f_summary = new Field("Summary", summary, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);

                Field f_content = new Field("Content", content, Field.Store.NO,
                        Field.Index.TOKENIZED);
                doc.add(f_url);
                doc.add(f_title);
                doc.add(f_summary);
                doc.add(f_content);
                indexWriter.addDocument(doc);
                indexWriter.optimize();
            }
            catch (Exception e)
            {
                e.printStackTrace(System.out);
            }
        }
        finally
        {
            indexWriter.close();
        }
        Thread t = Thread.currentThread();
    }

    private InputStream getHtmlStream(String urlstr) throws Exception
    {
        HttpClient httpclient = new HttpClient();
        System.out.println(indexPrefix + urlstr);
        
        PostMethod httppost = new PostMethod(indexPrefix + urlstr);
        httpclient.executeMethod(httppost);
        InputStream is = httppost.getResponseBodyAsStream();
        return is;
    }

    public boolean indexExist(String indexDir)
    {
        return IndexReader.indexExists(indexDir);
    }

    private String EscapSpecialChar(String str)
    {
        for (int i = 0; i < specialChar.length; i++)
        {
            StringUtils.replace(str, specialChar[i], "\\" + specialChar[i]);
        }
        return str;
    }

    public String genFiled(String fName, String fValue)
    {
        String str = fName + ":\"" + EscapSpecialChar(fValue) + "\"";
        return str;
    }

    public String getIndexPrefix()
    {
        return indexPrefix;
    }

    public void setIndexPrefix(String indexPrefix)
    {
        this.indexPrefix = indexPrefix;
    }  

    public URLDao getUrldao()
    {
        return urldao;
    }

    public void setUrldao(URLDao urldao)
    {
        this.urldao = urldao;
    }

    public void setIndexDir(Resource indexDir)
    {
        this.indexDir = indexDir;
    }

    public void insertSearchLink(String modelID, String url)
    {
        Map map = new HashMap();
        map.put("Model", modelID);
        map.put("URL", url);
        this.urldao.insert(map);
    }

    public void deleteSearchLink(String modelID)
    {
        this.urldao.deleteByModelID(modelID);
    }

    public void createCustomIndex(List list) throws Exception
    {
        Analyzer luceneAnalyzer = new StandardAnalyzer();
        String temppath = indexDir.getFile().getPath() + "_customtmp";
        String path = indexDir.getFile().getPath() + "_custom";
        boolean create = !indexExist(temppath);

        IndexWriter indexWriter = new IndexWriter(temppath, luceneAnalyzer, create);
        try
        {
            indexWriter.setMergeFactor(1500);
            for (int i = 0; i < list.size(); i++)
            {

                Map map = (Map) list.get(i);
                String Caption = (String) map.get("Caption");
                Object IssueDate = map.get("IssueDate");
                String IssueDateStr = "";
                if (IssueDate != null)
                {
                    IssueDateStr = IssueDate.toString().substring(0, 10);
                }
                else
                {
                    IssueDateStr = "";
                }
                String ID = (String) map.get("ID");
                String Content = (String) map.get("Content");// 内容
                Document doc = new Document();

                Field f_ID = new Field("ID", ID, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                Field f_Caption = new Field("Caption", Caption, Field.Store.YES,
                        Field.Index.TOKENIZED);
                Field f_content = new Field("Content", Content, Field.Store.NO,
                        Field.Index.TOKENIZED);
                Field f_IssueDate = new Field("IssueDate", IssueDateStr, Field.Store.YES,
                        Field.Index.UN_TOKENIZED);
                doc.add(f_ID);
                doc.add(f_Caption);
                doc.add(f_content);
                doc.add(f_IssueDate);
                indexWriter.addDocument(doc);
                indexWriter.optimize();
            }
        }
        finally
        {
            indexWriter.close();
        }
        try
        {
            File ftemp = new File(temppath);
            File f = new File(path);
            try
            {
                FileUtils.forceDelete(f);
            }
            catch (Exception ee)
            {
            }
            ftemp.renameTo(f);
        }
        catch (Exception e)
        {
            e.printStackTrace(System.out);
        }
    }
    
    public List getURL(String url)
    {
    	return urldao.getURL();
    }
    
    public void update(Map map)
    {
    	urldao.update(map);
    }
    
    public void insertOrupdateByUrl(String url,String model)
    {
    	//String searchurl = url.replace("&", "'||chr(38)||'");
    	String searchurl = StringUtils.replace(url, "&", "'||chr(38)||'");
    	List results = urldao.getURL(searchurl);
    	if((results == null || results.size()==0) && StringUtils.isNotBlank(model))
    	{
    		Map map = new HashMap();
    		map.put("URL", url);
    		map.put("MODEL", model);
    		urldao.insert(map);
    	}else if(results != null && results.size()>0 && StringUtils.isNotBlank(url)){
    		Map map = (Map)results.get(0);
    		map.put("URL", url);
    		urldao.update(map);
    	}
    }
    
    public void deleteByUrl(String url)
    {
    	url = StringUtils.replace(url, "&", "'||chr(38)||'");
    	urldao.deleteByUrl(url);
    }
}

 dao层代码

package com.sdfxw.search.dao;

import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringEscapeUtils;
import org.carf.common.jdbc.JdbcDaoSupportEx;
import org.carf.common.jdbc.JdbcTemplateEx;

public class URLDao extends JdbcDaoSupportEx 
{
	public List getURL()
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "select * from SEARCHLINK";
		return template.queryForList(sql);
	}
	
	public List getURL(String url)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "select * from SEARCHLINK where URL ='"+ StringEscapeUtils.escapeSql(url) +"'";
		return template.queryForList(sql);
	}
	
	public void insert(Map map)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		map.put("LINKID", "-2");
		template.insertMap(map, "SEARCHLINK", "LINKID");
	}
	
	public void update(Map map)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		template.updateMap(map, "SEARCHLINK", "LINKID");
	}
	
	public void deleteByUrl(String url)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "DELETE FROM SEARCHLINK WHERE URL='"+url + "'";
		template.update(sql);
	}
	
	public void deleteByModelID(String modelID)
	{
		JdbcTemplateEx template = this.getJdbcTemplate();
		String sql = "DELETE FROM SEARCHLINK WHERE Model=?";
		template.update(sql, new Object[] { modelID });
	}
}
分享到:
评论

相关推荐

    Lucene搜索引擎开发权威经典(附盘源码)【于天恩】.zip

    《Lucene搜索引擎开发权威经典》是由于天恩编著的一本深入探讨Lucene搜索引擎开发的专业书籍,这本书结合源码分析,旨在帮助读者掌握Lucene的核心技术和应用实践。Lucene是Apache软件基金会的一个开放源代码项目,它...

    Lucene搜索引擎 JSP + JAVA

    **Lucene搜索引擎 JSP + JAVA** Lucene是一个高性能、全文本搜索库,由Apache软件基金会开发,它提供了索引和搜索大量文本数据的能力。在这个项目中,Lucene被结合了JSP(JavaServer Pages)和JAVA技术,创建了一个...

    LUCENE搜索引擎基本工作原理

    **LUCENE搜索引擎基本工作原理** Lucene是一个开源的全文搜索引擎库,被广泛应用于构建复杂的搜索引擎系统。它的设计目标是高效、灵活且可扩展。理解Lucene的工作原理有助于开发人员更好地利用这一强大的工具。 **...

    Lucene搜索引擎开发权威经典随书资源1-6章

    本书基于Lucene的当前最新版本(2.1)精解了Lucene搜索引擎的相关知识,从基础知识到应用开发,精炼简洁,恰到好处。  本书包含了必要的理论,但以实践为主。所讲的理论都不是纸上谈兵,都是可以立即付诸实践进行...

    Lucene搜索-引擎开发权威经典pdf+源码第二部分

    《Lucene搜索-引擎开发权威经典》是一本深入解析Apache Lucene搜索引擎库的专业书籍,它为读者提供了构建高效全文搜索引擎的全面指南。Lucene是Java领域最著名的全文检索库,被广泛应用于各种信息检索系统中,包括...

    [Lucene搜索引擎开发权威经典].zip

    《Lucene搜索引擎开发权威经典》是一本深入探讨Apache Lucene的专著,作者于天恩在书中详尽地阐述了Lucene的核心概念、工作原理以及实际应用。这本书旨在帮助读者理解如何利用Lucene构建高性能、可扩展的全文搜索...

    基于lucene搜索引擎的java源码

    **基于Lucene搜索引擎的Java源码详解** Lucene是一个高性能、全文检索库,它由Apache软件基金会开发并维护。此Java源码包提供了一个全面的示例,展示了如何利用Lucene进行索引创建、更新(增量索引)以及搜索操作。...

    lucene搜索引擎项目

    《深入理解Lucene搜索引擎项目》 Lucene是一个高性能、全文本搜索库,它为开发者提供了在Java应用程序中实现全文检索的工具集。这个名为“lucene搜索引擎项目”的资源,旨在帮助用户更好地理解和应用Lucene来构建...

    lucene 搜索中文PDF文档

    **正文** ...总结,利用Lucene搜索中文PDF文档涉及多个技术层面,包括中文分词、PDF解析、索引构建、搜索执行和性能优化。通过理解这些关键技术,开发者可以构建出高效、准确的中文PDF文档检索系统。

    Lucene4.X实战类baidu搜索的大型文档海量搜索系统-10.Lucene搜索深入实战2 共11页.pptx

    【课程大纲】01.Lucene4入门精通实战课程-概述 共23页02.Lucene系统架构 共16页03.Lucene索引里有什么 共17页04.Lucene索引深入 共24页05.Lucene索引深入优化 共10页06.Lucene索引搜索 共13页07.Lucene搜索实战1 共4...

    Lucene4.X实战类baidu搜索的大型文档海量搜索系统-09.Lucene搜索深入实战1 共5页.pptx

    【课程大纲】01.Lucene4入门精通实战课程-概述 共23页02.Lucene系统架构 共16页03.Lucene索引里有什么 共17页04.Lucene索引深入 共24页05.Lucene索引深入优化 共10页06.Lucene索引搜索 共13页07.Lucene搜索实战1 共4...

    Lucene4.X实战类baidu搜索的大型文档海量搜索系统-13.Lucene搜索深入实战进阶3 共5页.pptx

    【课程大纲】01.Lucene4入门精通实战课程-概述 共23页02.Lucene系统架构 共16页03.Lucene索引里有什么 共17页04.Lucene索引深入 共24页05.Lucene索引深入优化 共10页06.Lucene索引搜索 共13页07.Lucene搜索实战1 共4...

    Lucene 搜索方法(多短语搜索)

    1. **DemoData.java** - 这个文件很可能是包含测试数据或者示例数据的类,用于演示Lucene搜索功能。它可能包含了创建索引所需的文档对象,以及用于搜索的关键词。 2. **MultiPhraseQueryDemo.java** - 这个文件是多...

    [Lucene搜索引擎开发权威经典].于天恩著.扫描版.7z.002

    [Lucene搜索引擎开发权威经典].于天恩著.扫描版.7z.001[Lucene搜索引擎开发权威经典].于天恩著.扫描版.7z.001

    Solr Elasticsearch lucene 搜索引擎

    Solr、Elasticsearch和Lucene是三个在搜索引擎领域中至关重要的技术,它们共同构建了现代数据检索的基础架构。下面将分别对这三个组件进行详细解释,并探讨它们之间的关系。 **Lucene** Lucene是一个高性能、全文本...

    Lucene搜索-引擎开发权威经典pdf+源码

    Lucene搜索-引擎开发权威经典pdf+源码第一部分共2个

    ssh+lucene搜索实例

    在这个"ssh+lucene搜索实例"中,我们可以理解为结合了SSH和Lucene两个技术,以实现远程服务器上的全文检索功能。例如,可能有一个需求是在多个远程服务器上存储大量数据,而这些数据需要通过关键词进行快速搜索。在...

    Lucene搜索引擎开发权威经典随书资源7-10

    本书基于Lucene的当前最新版本(2.1)精解了Lucene搜索引擎的相关知识,从基础知识到应用开发,精炼简洁,恰到好处。  本书包含了必要的理论,但以实践为主。所讲的理论都不是纸上谈兵,都是可以立即付诸实践进行...

Global site tag (gtag.js) - Google Analytics