solr3.5集成paoding和位置搜索及对solrj的使用。

xiaofancn

浏览: 333943 次
性别:
来自: 北京

最近访客更多访客>>

kindhyh

xindunm

sky54521

hhhh8888

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

J2EE
分词

http://xiaofancn.iteye.com/blog/1424252

http://wiki.apache.org/solr/Solrj

http://hi.baidu.com/channing07/blog/item/cb840754a98fc7c9b645ae3e.html

* https://github.com/dsmiley/SOLR-2155 关于位置的搜索的增强库

* http://wiki.apache.org/solr/SpatialSearch#SOLR-2155

为了让paoding适合高版本的solr3.5，我重新更换了一下paoding编译的部分jar包。

jar是从 solr3.5 项目中apache-tomcat-7.0.27\webapps\solr\WEB-INF\lib复制

添加新的分词类

package net.paoding.analysis.analyzer.solr;
import java.io.Reader;
import java.util.Map;
import net.paoding.analysis.analyzer.PaodingTokenizer;
import net.paoding.analysis.analyzer.TokenCollector;
import net.paoding.analysis.analyzer.impl.MaxWordLengthTokenCollector;
import net.paoding.analysis.analyzer.impl.MostWordsTokenCollector;
import net.paoding.analysis.knife.PaodingMaker;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.solr.analysis.BaseTokenizerFactory;

public class ChineseTokenizerFactory  extends BaseTokenizerFactory {
	/**
	* 最多切分 默认模式
	*/
	public static final String MOST_WORDS_MODE = "most-words";
	/**
	* 按最大切分
	*/
	public static final String MAX_WORD_LENGTH_MODE = "max-word-length";
	private String mode = null;

	public void setMode(String mode) {
	   if (mode == null || MOST_WORDS_MODE.equalsIgnoreCase(mode)
	     || "default".equalsIgnoreCase(mode)) {
	    this.mode = MOST_WORDS_MODE;
	   } else if (MAX_WORD_LENGTH_MODE.equalsIgnoreCase(mode)) {
	    this.mode = MAX_WORD_LENGTH_MODE;
	   } else {
	    throw new IllegalArgumentException(
	      "不合法的分析器Mode                                                参数设置:"
	        + mode);
	   }
	}

	@Override
	public void init(Map<String,String> args) {
	   super.init(args);
	   setMode(args.get("mode"));
	}

	public Tokenizer create(Reader input) {
	   return new PaodingTokenizer(input, PaodingMaker.make(),
	     createTokenCollector());
	}

	private TokenCollector createTokenCollector() {
	   if (MOST_WORDS_MODE.equals(mode))
	    return new MostWordsTokenCollector();
	   if (MAX_WORD_LENGTH_MODE.equals(mode))
	    return new MaxWordLengthTokenCollector();
	   throw new Error("never happened");
	}
}

编译后，复制paoding-analysis.jar到solr的lib包中

apache-tomcat-7.0.27\webapps\solr\WEB-INF\lib

apache-tomcat-7.0.27\webapps\solr\solr\conf\schema.xml

里面很多field的type是text_general

所以我们修改text_general类型的切词类，

将类型text_general中的

索引和查询的切词类，换成我们的切词类。

起动tomcat,成功后。

在我们的maven,pom.xml文件中添加solrj

<dependency>
               <artifactId>solr-solrj</artifactId>
               <groupId>org.apache.solr</groupId>
               <version>1.3.0</version>
               <type>jar</type>
               <scope>test</scope>
        </dependency>
        <dependency>
               <artifactId>solr-core</artifactId>
               <groupId>org.apache.solr</groupId>
               <version>1.3.0</version>
               <type>jar</type>
               <scope>test</scope>
        </dependency>

测试代码，运行前请读下面的注意事项。

package com.snailteam.team.dao;

import java.io.IOException;
import java.net.MalformedURLException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

import javax.annotation.Resource;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;

import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;

import com.snailteam.team.model.City;
import com.snailteam.team.model.Shop;
import com.snailteam.team.service.CityService;
import com.snailteam.team.service.ProductService;
import com.snailteam.team.service.ShopService;
import com.snailteam.team.service.UserService;

/**
 * 
 * https://github.com/dsmiley/SOLR-2155
 * http://wiki.apache.org/solr/SpatialSearch#SOLR-2155
 * 
 * 
 * @author fansxnet
 * 
 */

@SuppressWarnings("restriction")
@ContextConfiguration(locations = { "classpath*:/META-INF/spring/applicationContext*.xml" })
@RunWith(SpringJUnit4ClassRunner.class)
public class UserServiceTest {

	public static String url = "http://localhost:8080/solr/";
	public static CommonsHttpSolrServer server;

	@Before
	public void before() throws MalformedURLException {
		server = new CommonsHttpSolrServer(url);
		server.setParser(new XMLResponseParser());

	}

	@Resource
	public UserService userService;

	@PersistenceContext
	EntityManager em;

	@Resource
	ShopService shopService;

	@Resource
	CityService cityService;

	@Resource
	ProductService productService;

	@Resource
	MongoTemplate mongoTemplate;

	@Test
	public void testTorecallAdIdList() {

	}

	@Test
	public void testSoleAdd() throws SolrServerException, IOException {
		City city = cityService.getAll().get(0);
		List<Shop> shops = shopService.getShops(city);
		Collection<SolrInputDocument> docs = new HashSet<SolrInputDocument>();
		for (Shop shop : shops) {
			SolrInputDocument doc = new SolrInputDocument();
			doc.addField("id", shop.getId());
			if (shop.getLatitude() != null && shop.getLongitude() != null) {
				// latitudes are range -90 to 90
				// longitude are range -180 to 180
				// doc.addField("loc",
				// shop.getLongitude() + "," + shop.getLatitude());
				doc.addField("shopname", shop.getName());
				doc.addField("addr", shop.getAddr());
				doc.addField("tel", shop.getTel());
				doc.addField("traff", shop.getTraff());
				doc.addField("shoploc",
						shop.getLatitude() + "," + shop.getLongitude());
			}
			docs.add(doc);

		}
		server.add(docs);
		server.commit();

	}

	/**
	 * 暂时不可用
	 * 
	 * @throws SolrServerException
	 * @throws IOException
	 */
	@Test
	public void testSoleSearch() throws SolrServerException, IOException {
		// //
		// select?wt=json&indent=true&fl=shopname,addr,tel,traff,shoploc,_dist_:geodist()&q=*:*&sfield=store&pt=39.904392,116.265033&sort=geodist()%20asc
		SolrQuery query = new SolrQuery();
		query.setQuery("shopname:北京");// &q=*:*
		query.setFacet(true);
		query.setFacetMinCount(1);
		query.setFacetLimit(8);
		// 参数说明
		// indent – 返回的结果是否缩进，默认关闭，
		// fl – 返回的字段
		// sfield – spatial point data is "sfield". See the console output
		// below.
		// pt – latitude longitude
		// sort 排序字段 geodist()%20asc 有问题
		// ,_dist_:geodist()获取距离不正确,无值

		query.set("sfield", "store");//
		query.set("pt", "39.904392,116.265033");
		query.set("sort", "geodist() asc");
		query.set("fl", "_dist_:geodist()");

		query.setFields("shopname", "addr", "tel", "traff", "shoploc");
		server.setParser(new XMLResponseParser());// 设置solrj的解析格式
		QueryResponse rsp = server.query(query, METHOD.POST);
		SolrDocumentList docs = rsp.getResults();
		Iterator<SolrDocument> iterator = docs.iterator();
		while (iterator.hasNext()) {
			SolrDocument solrDoc = iterator.next();
			System.out.println(solrDoc.getFieldValue("shopname") + "-"
					+ solrDoc.getFieldValue("addr") + "-"
					+ solrDoc.getFieldValue("shoploc") + "-");
		}
	}
}

注意：

为了更形象，我们在solr中定义与我们项目一致的feild字段。新加

<!--shop field-->
		
		<field name="shopid" type="text_general" indexed="true" stored="true"/>
		<field name="shopname" type="text_general" indexed="true" stored="true"/>
		<field name="addr" type="text_general" indexed="true" stored="true"/>
		<field name="tel" type="text_general" indexed="true" stored="true"/>
		<field name="traff" type="text_general" indexed="true" stored="true"/>
		<field name="shoploc" type="location" indexed="true" stored="true" />

打开solr后台管理页面 http://localhost:8080/solr/admin/

shopid:3153

查看图片附件

分享到：

android listview的使用 | 海量数据之统计ip频率top

2012-04-28 02:12
浏览 2029
评论(0)
分类:企业架构
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论