lucene 创建索引

dh189

浏览: 135937 次
性别:
来自: 上海

最近访客更多访客>>

xejinqq

jijl2001

free0007

onetake

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

java

lucene Apache HTML

@Retention(RetentionPolicy.RUNTIME)
@Target( { ElementType.FIELD })
public @interface IndexAnnotation {

	//存储
	public boolean store() default false;

	//分词
	public boolean analyse() default false;
	
	//删除HTML代码
	public boolean parseHtml() default true;
	
	//权重分
	public float boost() default 10;
}

@SuppressWarnings("unchecked")
public final class IndexDocumentUtils {

	private final static Logger log = LoggerFactory
			.getLogger(IndexDocumentUtils.class);

	/**
	 * 创建索引
	 * 
	 * @param idataIndex
	 * @return
	 */

	public static Document createDocument(IdataIndex idataIndex) {
		Class clzss = idataIndex.getClass();
		Document doc = new Document();
		Field[] fields = clzss.getDeclaredFields();

		for (Field field : fields) {
			if (field.getName().equals("serialVersionUID"))
				continue;
			String value = getFieldValue(idataIndex, field.getName());
			org.apache.lucene.document.Field indexField = new org.apache.lucene.document.Field(
					field.getName(), value, getStore(idataIndex, field
							.getName()), getIndex(idataIndex, field.getName()));

			//设置权重值
			indexField.setBoost(getBoost(idataIndex, field.getName()));
			doc.add(indexField);

		}

		return doc;
	}

	/**
	 * 通过反射获取字段值
	 * 
	 * @param idataIndex
	 * @param fieldName
	 * @return
	 */

	private static Pattern tagPattern = Pattern
			.compile("<.*?>", Pattern.DOTALL);

	private static String getFieldValue(IdataIndex idataIndex, String fieldName) {
		try {
			boolean isMatcher = false;
			String value = StringUtil.defaultIfEmpty(BeanUtils.getProperty(
					idataIndex, fieldName));
			StringBuffer sb = new StringBuffer();
			if (isParseHtml(idataIndex, fieldName)) {// 是否解析html内容
				if (StringUtils.isNotEmpty(value)) {
					Matcher matcher = tagPattern.matcher(value);
					while (matcher.find()) {
						isMatcher = true;
						matcher.appendReplacement(sb, "");
					}
					matcher.appendTail(sb);
				} else {
					return "";
				}
			}
			return isMatcher ? sb.toString() : value;
		} catch (Exception e) {
			log.error(e);
			return "";
		}
	}

	/**
	 * 返回索引字段是否存储
	 * 
	 * @param idataIndex
	 * @param fieldName
	 * @return
	 */
	private static Store getStore(IdataIndex idataIndex, String fieldName) {
		Class clzss = idataIndex.getClass();
		try {
			Field field = clzss.getDeclaredField(fieldName);
			IndexAnnotation ia = field.getAnnotation(IndexAnnotation.class);
			if (ia != null) {// 检查注解的值
				if (ia.store()) {
					return org.apache.lucene.document.Field.Store.YES;
				}
			}
		} catch (Exception e) {
			log.error(e);
		}
		return org.apache.lucene.document.Field.Store.NO;
	}

	/**
	 * 返回索引字段是否索引
	 * 
	 * @param idataIndex
	 * @param fieldName
	 * @return
	 */
	private static Index getIndex(IdataIndex idataIndex, String fieldName) {
		Class clzss = idataIndex.getClass();
		try {
			Field field = clzss.getDeclaredField(fieldName);
			IndexAnnotation ia = field.getAnnotation(IndexAnnotation.class);
			if (ia != null) {// 检查注解的值
				if (ia.analyse()) {
					return org.apache.lucene.document.Field.Index.ANALYZED;
				}
			}
		} catch (Exception e) {
			log.error(e);
		}
		return org.apache.lucene.document.Field.Index.ANALYZED;
	}

	/**
	 * 返回索引字段是否解析HTML
	 * 
	 * @param idataIndex
	 * @param fieldName
	 * @return
	 */
	private static boolean isParseHtml(IdataIndex idataIndex, String fieldName) {
		Class clzss = idataIndex.getClass();
		try {
			Field field = clzss.getDeclaredField(fieldName);
			IndexAnnotation ia = field.getAnnotation(IndexAnnotation.class);
			if (ia != null) {// 检查注解的值
				return ia.parseHtml();

			}
		} catch (Exception e) {
			log.error(e);
		}
		return true;
	}

	/**
	 * 返回权重值
	 * @param idataIndex
	 * @param fieldName
	 * @return
	 */
	private static float getBoost(IdataIndex idataIndex, String fieldName) {
		Class clzss = idataIndex.getClass();
		try {
			Field field = clzss.getDeclaredField(fieldName);
			IndexAnnotation ia = field.getAnnotation(IndexAnnotation.class);
			if (ia != null) {// 检查注解的值
				return ia.boost();

			}
		} catch (Exception e) {
			log.error(e);
		}
		return 10;
	}
}

public class SearchIndex implements Serializable {

	/**
	 * 
	 */
	private static final long serialVersionUID = 153648837940506749L;

	//索引编号
	@IndexAnnotation(store = true)
	private String id;

	//资源ID
	@IndexAnnotation(store = true)
	private String resourceId;

	//标题
	@IndexAnnotation(store = true, analyse = true,boost=100)
	private String title;

	//索引内容说明
	@IndexAnnotation(store = true, analyse = true,boost=50)
	private String content;

	

	public String getId() {
		return id;
	}

	public void setId(String id) {
		this.id = id;
	}

	public String getResourceId() {
		return resourceId;
	}

	public void setResourceId(String resourceId) {
		this.resourceId = resourceId;
	}

	public String getTitle() {
		return title;
	}

	public void setTitle(String title) {
		this.title = title;
	}

	public String getContent() {
		return content;
	}

	public void setContent(String content) {
		this.content = content;
	}

	public String toString() {
        return ToStringBuilder.reflectionToString(this, ToStringStyle.MULTI_LINE_STYLE);
    }
}

分享到：

jquery 回到顶部插件 | 反射工具类

2010-09-01 14:48
浏览 1065
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene 创建索引

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

lucene 创建索引

评论

发表评论

相关推荐

Tomcat 启动不了问题

Properties类小结

反射工具类

url参数和map之间的转换

使用jackson解析json数据时获取多级节点的值，递归实现

将javabean转换成Map

httpclient4 ThreadSafeClientConnManager 多线程程执行HTTP请求的例子

CalendarUtil

java 中对象属性和数据库中字段名的转换

使用HttpClient4 和jsoup下载Google 1998年到2010年的logo（原创）

GoogleLogoUtil

HttpClient4 实现文件下载

java httpClient4.0 通过代理认证访问网站

httpclient 获取到网页内容自动判断内容编码

java 生成32位UUID

java bean反射 获取get set方法(Method)

java 半角与全角转换

使用jackson解析json数据

java高清新处理图片

java人民币小写转换大写

最近访客更多访客>>

java bean反射获取get set方法(Method)