Lucene4.3检索，分词，搜索

leiyongping88

浏览: 77171 次
性别:
来自: 深圳

最近访客更多访客>>

xlscutcs

youling0548

xx5333

juggerhoo

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Lucene

job建索引

public class SoftIndexJob{
    private Log log = LogFactory.getLog(SoftIndexJob.class);

    private String indexpath = SearchEngineCore.getIndexpath("VSOYOU_SOFT_INDEX_PATH"); //索引的路径
    private String lastDate; //上次建立索引的时间


    public void doUpdateIndexData(){
        SoftService softService = ServiceFactory.getBean(SoftService.class);
        String lastModify = softService.getSoftLastModify();
        if(StringUtils.isNotBlank(lastDate) &&StringUtils.isNotBlank(lastModify) && lastModify.equals(lastDate)){
            return;
        }
        lastDate = lastModify;
        int rows = 20000,start=0;
        String tmpIndexPath = indexpath + File.separator + "softTmp";// 临时索引路径
        IndexWriter writer = null;
        try {
            FileUtil.deleteFile(tmpIndexPath);// 删除临时索引目录
            writer = SearchEngineCore.getIndexWriter(tmpIndexPath);
            if(null == writer) return;
            System.out.println("索引SoftIndex更新start");
            while (true) {
                List<Soft> list = softService.findSoftIndexRes(start,rows);
                if(null ==list || list.isEmpty()) break;
                Document doc = null;
                for (Soft soft : list) {
                    doc = new Document();
                    doc.add(new LongField("softId", soft.getSoftId(), Field.Store.YES));
                    doc.add(new LongField("releaseId", soft.getReleaseId(), Field.Store.YES));
                    doc.add(new TextField("softName", StringUtils.isNotBlank(soft.getSoftName())?soft.getSoftName():"", Field.Store.YES));
                    doc.add(new StringField("iconPath", StringUtils.isNotBlank(soft.getIconPath())?soft.getIconPath():"", Field.Store.YES));
                    doc.add(new IntField("stars", soft.getStars(), Field.Store.YES));
                    doc.add(new LongField("fileSize", soft.getFileSize(), Field.Store.YES));
                    doc.add(new StringField("releaseDate", soft.getReleaseDate(), Field.Store.YES));
                    doc.add(new StringField("versionName", StringUtils.isNotBlank(soft.getVersionName())? soft.getVersionName():"", Field.Store.YES));
                    doc.add(new IntField("totalDownloads", soft.getTotalDownloads(), Field.Store.YES));
                    doc.add(new TextField("runType", soft.getRunType()+"", Field.Store.YES));
                    doc.add(new IntField("totalComemntCount", soft.getTotalComemntCount(), Field.Store.YES));
                    doc.add(new IntField("freeUse", soft.isFreeUse() == true ?1 :0, Field.Store.YES));

                    doc.add(new IntField("freeDownload", soft.isFreeDownload()==true ? 1:0, Field.Store.YES));
                    doc.add(new IntField("softCurrency", soft.getSoftCurrency(), Field.Store.YES));
                    doc.add(new LongField("versionCode", soft.getVersionCode(), Field.Store.YES));
                    doc.add(new StringField("packageName", StringUtils.isNotBlank(soft.getPackageName())?soft.getPackageName() : "", Field.Store.YES));
                    writer.addDocument(doc);
                }
                start += rows;
            }
        } catch (Exception e) {
            e.printStackTrace();
            log.info(e.getMessage(),e);
        }finally{
            if(null !=writer){
                try {
                    writer.forceMerge(1);
                    writer.commit();
                    writer.close();
                    Directory dir = SearchEngineCore.getWriteDirectory(tmpIndexPath);
                    if(null !=dir && IndexWriter.isLocked(dir))
                        IndexWriter.unlock(dir);
                    System.out.println("索引SoftIndex更新end");
                } catch (IOException e) {
                    e.printStackTrace();
                    log.info(e.getMessage(),e);
                }
            }
            String indexDir = indexpath + File.separator + "softIndex";
            SearchEngineCore.updateIndex(tmpIndexPath, indexDir);
        }
    }
}

import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;

/**
* 查询和排序对象
*
*/
public class QuerySort {

    public Query query;
    public Sort sort;

    public QuerySort(Query query, Sort sort) {
        this.query = query;
        this.sort = sort;
    }

    public QuerySort() {
    }
}

public class SearchEngineCore {
    protected static Log log = LogFactory.getLog(SearchEngineCore.class);
    protected String indexpath = null; //索引的路径
    private static Object lock_r= new Object();
    private static Object lock_w=new Object();

    public SearchEngineCore(){}


    // 创建索引IndexWriter
    public static IndexWriter getIndexWriter(String indexDir) throws IOException {
        IndexWriter indexWriter = null;
        try {
            synchronized(lock_w){
                Directory indexDirectory = getWriteDirectory(indexDir);
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, new IKAnalyzer());
                if(IndexWriter.isLocked(indexDirectory)){
                    IndexWriter.unlock(indexDirectory);
                }
                indexWriterConfig.setOpenMode(OpenMode.CREATE);
                indexWriter = new IndexWriter(indexDirectory,indexWriterConfig);
            }
        } catch (Exception e) {
            e.printStackTrace();
            log.error(e.getMessage(), e);
        }
        return indexWriter;
    }

    public static Directory getWriteDirectory(String indexDir) {
        Directory indexDirectory = null;
        try {
            File indexFile = new File(indexDir);
            if(!indexFile.exists()) {
                indexFile.mkdir();
            }
            indexDirectory = FSDirectory.open(indexFile);
        } catch (IOException e) {
            e.printStackTrace();
            log.error(e.getMessage(), e);
        }
        return indexDirectory;
    }

    public static IndexSearcher getIndexSearcher(String indexDir){
        IndexSearcher indexSearcher = null;
        try {
            synchronized(lock_r){
                File indexFile = new File(indexDir);
                if(!indexFile.exists()) {
                    return null;
                }

                IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile));
                indexSearcher = new IndexSearcher(reader);
            }
        } catch (IOException e) {
            e.printStackTrace();
            log.error(e.getMessage(), e);
        }
        return indexSearcher;
    }

    /**
    * 更新索引
    * @param srcDir
    * 更新目录
    * @param dstDir
    * 被更新目录
    */
    public static void updateIndex(String srcDir, String dstDir) {
        File dstDirFile = new File(dstDir);
        if (dstDirFile.exists()) {
            String dstBakDir = dstDir + ".bak";
            File dstBakDirFile = new File(dstBakDir);
            if (dstBakDirFile.exists()) {
                boolean flag = FileUtil.deleteFile(dstBakDirFile);
                log.info("delete " + dstBakDir + "====" + flag);
            }
            IndexUtil.renameFile(dstDir, dstBakDir);
        }
        IndexUtil.renameFile(srcDir, dstDir);
    }

    public static String getIndexpath(String idxPath) {
        String indexPath = Config.getProperty(idxPath);
        return SearchEngineCore.class.getResource("/").toString().replaceAll("WEB-INF/classes", indexPath)
            .replaceAll("\\\\", "/").replace("file:/", "").trim();

    }
}

public class UserInfo implements Serializable{
    private static final long serialVersionUID = -1341713350583127283L;

    private Long userId;
    private String headImg;
    private String nickName;
    @JSONField(serialize=false)
    private String loginName;
    private int sex=-1; //性别 0=男 1=女
    private int checkinCount=0; //签到数
    private int favoriteCount=0; //收藏数

    public UserInfo(){}

    public Long getUserId() {
        return userId;
    }

    public void setUserId(Long userId) {
        this.userId = userId;
    }

    public String getHeadImg() {
        return headImg;
    }

    public void setHeadImg(String headImg) {
        this.headImg = headImg;
    }

    public String getNickName() {
        return nickName;
    }

    public void setNickName(String nickName) {
        this.nickName = nickName;
    }

    public int getSex() {
        return sex;
    }

    public void setSex(int sex) {
        this.sex = sex;
    }

    public int getCheckinCount() {
        return checkinCount;
    }

    public void setCheckinCount(int checkinCount) {
        this.checkinCount = checkinCount;
    }

    public int getFavoriteCount() {
        return favoriteCount;
    }

    public void setFavoriteCount(int favoriteCount) {
        this.favoriteCount = favoriteCount;
    }

    public String getLoginName() {
        return loginName;
    }

    public void setLoginName(String loginName) {
        this.loginName = loginName;
    }

}

索引搜索:

public class SoftSearch extends BaseServiceImpl{

    private static String indexpath = SearchEngineCore.getIndexpath("VSOYOU_SOFT_INDEX_PATH"); //索引的路径
    private static IndexSearcher indexSearcher = null;

    public static void main(String[] args) throws ParseException {
        long startTime = System.currentTimeMillis();
        String searchWord ="QQ欢乐斗地主";
        searchWord = SearchUtil.wmlEncode(searchWord);
        searchWord = SearchUtil.traditionalToSimple(searchWord).trim();// 繁体转简体

        int page =1;
        int pageSize = 100;
        TopDocs topDocs = search(searchWord,page,pageSize);
        System.out.println("总共命中数："+topDocs.totalHits);
        if(topDocs != null && topDocs.totalHits !=0){
            ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 搜索返回的结果集合
            //查询起始记录位置
            int begin = (page - 1)*pageSize ;
            //查询终止记录位置
            int end = Math.min(begin + pageSize, scoreDocs.length);
            SoftSearch search = new SoftSearch();
            List<Soft> softs = search.addHits2List(scoreDocs,begin,end);
            for (int i = 0; i < softs.size(); i++) {
                System.out.println("i: \t "+softs.get(i).getSoftName());
            }
        }
        System.out.println("检索完成用时:"+(System.currentTimeMillis()-startTime)+"毫秒");
    }

    private static TopDocs search(String searchWord,int page,int pageSize) throws ParseException {
        String indexDir = indexpath + File.separator + "softIndex";
        indexSearcher = SearchEngineCore.getIndexSearcher(indexDir);
        if(null == indexSearcher) return null;
        TopDocs topDocs = null;
        try {
            BooleanQuery allQuery = new BooleanQuery();
            //根据搜索关键字进行查询
            QueryParser parser = new QueryParser(Version.LUCENE_43,"softName",new IKAnalyzer());
            Query query = parser.parse(searchWord);
            allQuery.add(query, BooleanClause.Occur.MUST);

            QuerySort keywordQuerySort = getKeywordQuerySort();
            allQuery.add(keywordQuerySort.query,BooleanClause.Occur.MUST);

            topDocs = indexSearcher.search(allQuery, page*pageSize, keywordQuerySort.sort);
        } catch (IOException e) {
            e.printStackTrace();
        }
        return topDocs;
    }

    private List<Soft> addHits2List(ScoreDoc[] scoreDocs,int begin,int end) {
        List<Soft> softs = new ArrayList<Soft>();
        try {
            this.commentsInfoDao = ServiceFactory.getBean(CommentsInfoDao.class);
            for (int i = begin; i < end; i++) {
                int docID = scoreDocs[i].doc;
                Soft soft = new Soft();
                Document doc = indexSearcher.doc(docID);
                if(StringUtils.isNotBlank(doc.get("fileSize")))
                    soft.setFileSize(Long.valueOf(doc.get("fileSize")));
                if(StringUtils.isNotBlank(doc.get("freeDownload")))
                    soft.setFreeDownload(doc.get("freeDownload").equals("1"));
                if(StringUtils.isNotBlank(doc.get("freeUse")))
                    soft.setFreeUse(doc.get("freeUse").equals("1"));
                soft.setIconPath(doc.get("iconPath"));
                soft.setReleaseDate(doc.get("releaseDate"));
                soft.setReleaseId(Long.valueOf(doc.get("releaseId")));
                soft.setSoftName(doc.get("softName"));
                if(StringUtils.isNotBlank(doc.get("stars")))
                    soft.setStars(Integer.valueOf(doc.get("stars")));

                int commentRowCount = NumberUtils.strToInt(commentsInfoDao.getSoftCommentRowCount(Long.valueOf(doc.get("releaseId"))));
                if(commentRowCount >=0){
                    soft.setTotalComemntCount(commentRowCount);
                }else if(StringUtils.isNotBlank(doc.get("totalComemntCount"))){
                    soft.setTotalComemntCount(Integer.valueOf(doc.get("totalComemntCount")));
                }

                if(StringUtils.isNotBlank(doc.get("totalDownloads")))
                    soft.setTotalDownloads(Integer.valueOf(doc.get("totalDownloads")));
                soft.setVersionName(doc.get("versionName"));
                if(StringUtils.isNotBlank(doc.get("softCurrency")))
                    soft.setSoftCurrency(Integer.valueOf(doc.get("softCurrency")));
                if(StringUtils.isNotBlank(doc.get("versionCode")))
                    soft.setVersionCode(Long.valueOf(doc.get("versionCode")));
                if(StringUtils.isNotBlank(doc.get("packageName")))
                    soft.setPackageName(doc.get("packageName"));
                softs.add(soft);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        return softs;
    }

    private static QuerySort getKeywordQuerySort() {
        QuerySort querySort = new QuerySort();
        querySort.query = new TermQuery(new Term("runType", "100"));
        querySort.sort = new Sort(new SortField[] {
                new SortField("totalDownloads", SortField.Type.INT, true),
                new SortField("totalComemntCount", SortField.Type.INT, true),
                new SortField("releaseDate", SortField.Type.STRING, true),
                new SortField("freeDownload", SortField.Type.INT, true)
            });
        return querySort;
    }

    public Map<String, Object> searchKeyWord(String searchWord,int page,int pageSize) {
        Map<String, Object> map = new HashMap<String, Object>();
        map.put(Const.IMG_DOMAIN_KEY,Const.IMG_DOMAIN_VALUE);
        try {
            searchWord = SearchUtil.wmlEncode(searchWord);
            searchWord = SearchUtil.traditionalToSimple(searchWord).trim();// 繁体转简体
            TopDocs topDocs = search(searchWord,page,pageSize);
            if(topDocs == null || topDocs.totalHits ==0){
                map.put("list", null);
                return map;
            }
            map.put("pageCount", getPageCount(topDocs.totalHits,pageSize));
            ScoreDoc[] scoreDocs = topDocs.scoreDocs; // 搜索返回的结果集合
            //查询起始记录位置
            int begin = (page - 1)*pageSize ;
            //查询终止记录位置
            int end = Math.min(begin + pageSize, scoreDocs.length);
            List<Soft> softs = addHits2List(scoreDocs,begin,end);
            map.put("list", softs);


            /*SoftSearchKeyWord searchKeyWord = new SoftSearchKeyWord();
            searchKeyWord.setKeyword(searchWord);
            Thread thread = new Thread(new SearchKeyWordThread(searchKeyWord));
            thread.start();*/
        } catch (Exception e) {
            e.printStackTrace();
        }
        return map;
    }

    private int getPageCount(int rowCount, int pageSize) {
        int pageCount = 1;
        if ((rowCount % pageSize) == 0) {
            pageCount = rowCount / pageSize;
        } else {
            pageCount = rowCount / pageSize + 1;
        }
        if (pageCount == 0) {
            pageCount = 1;
        }
        return pageCount;
    }


}

需要jar包：

IKAnalyzer2012FF_u1.jar

IKAnalyzer3.2.5Stable.jar

lucene-analyzers-common-4.3.0.jar lucene-core-4.3.0.jar lucene-highlighter-4.3.0.jar lucene-memory-4.3.0.jar lucene-queryparser-4.3.0.jar

分享到：

Java与Http协议(HttpURLConnection和HttpCl ... | redis 超全的操作

2013-08-09 11:07
浏览 1020
评论(0)
分类:开源软件
查看更多

发表评论

文章已被作者锁定，不允许评论。

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论