Mycat 数据分片--取模函数源码阅读

gaojingsong

浏览: 1202088 次
性别:
来自: 深圳

最近访客更多访客>>

boveysmith

zah5897

xckouy

lengyun3566

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

数据库-----MYSQL
Mycat中间件

Mycat 数据分片--取模函数源码阅读

Mycat 数据分片--取模函数源码阅读：

package io.mycat.route.function;

import java.math.BigInteger;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.List;

import java.util.Map;

import io.mycat.config.model.rule.RuleAlgorithm;

/**

* number column partion by Mod operator

* if count is 10 then 0 to 0,21 to 1 (21 % 10 =1)

* @author wuzhih

public class PartitionByMod extends AbstractPartitionAlgorithm implements RuleAlgorithm {

private int count;

@Override

public void init() {

}

public void setCount(int count) {

this.count = count;

}

@Override

public Integer calculate(String columnValue) {

//columnValue = NumberParseUtil.eliminateQoute(columnValue);

try {

BigInteger bigNum = new BigInteger(columnValue).abs();

return (bigNum.mod(BigInteger.valueOf(count))).intValue();

} catch (NumberFormatException e){

throw new IllegalArgumentException(new StringBuilder().append("columnValue:").append(columnValue).append(" Please eliminate any quote and non number within it.").toString(),e);

}

@Override

public int getPartitionNum() {

int nPartition = this.count;

return nPartition;

}

private static void hashTest() {

PartitionByMod hash=new PartitionByMod();

//初始化机器节点的数量为11

hash.setCount(11);

hash.init();//空方法暂无实际意义

//数组用来统计每个节点存储的数据条数,数组下标从0开始编号,刚好和机器编号相同

int[] bucket=new int[hash.count];

//Map存储节点数据的片键,其中key为机器编号0-10,

// Value为分库分表对应的键值,例如以ID为主键的一系列集合

Map<Integer,List<Integer>> hashed=new HashMap<>();

int total=1000_0000;//数据量

int c=0;

for(int i=100_0000;i<total+100_0000;i++){//假设分片键从100万开始

c++;//计数器统计循环的次数

//核心思想 i%count 取模,取模之后的余数值范围为0-count-1,其值对应的就是机器的节点编号

//键值为0即刚好是count的整数倍

int h=hash.calculate(Integer.toString(i));

//计数器,统计落在节点h上的数据量

bucket[h]++;

//从Map中取出节点H中存储的片键集合容器List,第一次肯定为空

List<Integer> list=hashed.get(h);

if(list==null){

list=new ArrayList<>();

hashed.put(h, list);

}

//把落在节点H中的数据扔到集合容器里面

list.add(i);

}

System.out.println(c+" "+total);

double d=0;

c=0;

int idx=0;

System.out.println("index bucket ratio");

//bucket中的值就是数据落在每个节点的数量，下标就是机器编号

for(int i:bucket){

//变量d是一个分布率求和统计,i代表落在节点的数量,total代表数据总数

d+=i/(double)total;

//统计各个节点的数据总和

c+=i;

//输出：机器节点下标节点上数据总数节点中数据占总数的比例

System.out.println("机器编号:"+(idx++)+",数据总数: "+i+" ,占比例: "+(i/(double)total));

}

System.out.println("分布率求和:"+d+" 各节点数据总数即路由总数: "+c);

System.out.println("****************************************************");

//取出在节点0的数据进行重新分布

rehashTest(hashed.get(0));

}

private static void rehashTest(List<Integer> partition) {

PartitionByMod hash=new PartitionByMod();

//注意节点0上的数据都是11的整数倍,现在机器节点达到110,

//所以节点1上的数据重新路由会落在11,22,33,99上面

//机器编号为0---109

hash.count=110;//分片数

hash.init();

//新数组用来统计每个节点存储的数据条数,数组下标从0开始编号,刚好和机器编号相同

int[] bucket=new int[hash.count];

int total=partition.size();//数据量

int c=0;

for(int i:partition){//假设分片键从100万开始

c++;

int h=hash.calculate(Integer.toString(i));

bucket[h]++;

}

System.out.println(c+" "+total);

c=0;

int idx=0;

System.out.println("index bucket ratio");

for(int i:bucket){

c+=i;

System.out.println(idx+++" "+i+" "+(i/(double)total));

}

public static void main(String[] args) {

hashTest();

//PartitionByMod partitionByMod = new PartitionByMod();

//partitionByMod.count=8;

//partitionByMod.calculate("\"6\"");

//partitionByMod.calculate("\'6\'");

}

结果验证：

疑问：各个节点分布数据的比例总和为什么不是100%,而是99.9999%是因为机器存储浮点数的误差导致。

查看图片附件

0
顶

0
踩

分享到：

大数据之Apache-Kylin介绍 | 【jersey 模拟Http请求】

2016-12-01 22:47
浏览 1193
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论