客户端接口
- public String create(final String path, byte data[], List<ACL> acl,
- CreateMode createMode)
- throws KeeperException, InterruptedException
- {
- final String clientPath = path;
- PathUtils.validatePath(clientPath, createMode.isSequential());
- final String serverPath = prependChroot(clientPath);
- //请求头
- RequestHeader h = new RequestHeader();
- h.setType(ZooDefs.OpCode.create);
- //请求体
- CreateRequest request = new CreateRequest();
- //CREATE请求需要server端响应
- CreateResponse response = new CreateResponse();
- request.setData(data);
- //node类型
- request.setFlags(createMode.toFlag());
- request.setPath(serverPath);
- if (acl != null && acl.size() == 0) {
- throw new KeeperException.InvalidACLException();
- }
- request.setAcl(acl);
- //同步提交
- ReplyHeader r = cnxn.submitRequest(h, request, response, null);
- //异常情况
- if (r.getErr() != 0) {
- throw KeeperException.create(KeeperException.Code.get(r.getErr()),
- clientPath);
- }
- //真实路径,对于SEQUENTIAL NODE后面会加上序号
- if (cnxn.chrootPath == null) {
- return response.getPath();
- } else {
- return response.getPath().substring(cnxn.chrootPath.length());
- }
- }
请求提交过程和之前的exists一样,都是通过sendthread写出去,都会进入pengding队列等待server端返回。
server端处理也是一样,变化的只是RequestProcessor的业务逻辑。也就是说transport层是通用的,变化的是上层的业务层。
server端执行处理链,PrepRequestProcessor
- switch (request.type) {
- case OpCode.create:
- //反序列化的对象
- CreateRequest createRequest = new CreateRequest();
- //zxid递增
- pRequest2Txn(request.type, zks.getNextZxid(), request, createRequest, true);
- break;
- ......
- request.zxid = zks.getZxid();
- nextProcessor.processRequest(request);
具体处理
- protected void pRequest2Txn(int type, long zxid, Request request, Record record, boolean deserialize)
- throws KeeperException, IOException, RequestProcessorException
- {
- //构造内部的事务头
- request.hdr = new TxnHeader(request.sessionId, request.cxid, zxid,
- zks.getTime(), type);
- switch (type) {
- case OpCode.create:
- //检查session是否还有效
- zks.sessionTracker.checkSession(request.sessionId, request.getOwner());
- CreateRequest createRequest = (CreateRequest)record;
- //反序列化
- if(deserialize)
- ByteBufferInputStream.byteBuffer2Record(request.request, createRequest);
- String path = createRequest.getPath();
- //路径规则检查
- int lastSlash = path.lastIndexOf('/');
- if (lastSlash == -1 || path.indexOf('\0') != -1 || failCreate) {
- LOG.info("Invalid path " + path + " with session 0x" +
- Long.toHexString(request.sessionId));
- throw new KeeperException.BadArgumentsException(path);
- }
- //权限去重
- List<ACL> listACL = removeDuplicates(createRequest.getAcl());
- if (!fixupACL(request.authInfo, listACL)) {
- throw new KeeperException.InvalidACLException(path);
- }
- String parentPath = path.substring(0, lastSlash);
- //父节点的ChangeRecord
- ChangeRecord parentRecord = getRecordForPath(parentPath);
- //权限校验
- checkACL(zks, parentRecord.acl, ZooDefs.Perms.CREATE,
- request.authInfo);
- int parentCVersion = parentRecord.stat.getCversion();
- CreateMode createMode =
- CreateMode.fromFlag(createRequest.getFlags());
- //SEQUENCE节点修改路径
- if (createMode.isSequential()) {
- path = path + String.format(Locale.ENGLISH, "%010d", parentCVersion);
- }
- try {
- PathUtils.validatePath(path);
- } catch(IllegalArgumentException ie) {
- LOG.info("Invalid path " + path + " with session 0x" +
- Long.toHexString(request.sessionId));
- throw new KeeperException.BadArgumentsException(path);
- }
- //当前节点ChangeRecord,一并校验节点是否已经存在
- try {
- if (getRecordForPath(path) != null) {
- throw new KeeperException.NodeExistsException(path);
- }
- } catch (KeeperException.NoNodeException e) {
- // ignore this one
- }
- //EPHEMERAL节点不允许创建子节点
- boolean ephemeralParent = parentRecord.stat.getEphemeralOwner() != 0;
- if (ephemeralParent) {
- throw new KeeperException.NoChildrenForEphemeralsException(path);
- }
- //递增Cversion
- int newCversion = parentRecord.stat.getCversion()+1;
- //构造事务体,后续会被写入log
- request.txn = new CreateTxn(path, createRequest.getData(),
- listACL,
- createMode.isEphemeral(), newCversion);
- StatPersisted s = new StatPersisted();
- //如果是临时节点,则owner为sessionId
- if (createMode.isEphemeral()) {
- s.setEphemeralOwner(request.sessionId);
- }
- parentRecord = parentRecord.duplicate(request.hdr.getZxid());
- //递增parent的child数
- parentRecord.childCount++;
- //递增parent的cversion
- parentRecord.stat.setCversion(newCversion);
- //添加changeRecord到冗余队列
- addChangeRecord(parentRecord);
- addChangeRecord(new ChangeRecord(request.hdr.getZxid(), path, s,
- 0, listACL));
- break;
- ......
SyncRequestProcessor处理,主要是log写入,和之前的分析类似,不赘述
FinalRequestProcessor处理,修改内存中的datatree结构
- switch (header.getType()) {
- case OpCode.create:
- CreateTxn createTxn = (CreateTxn) txn;
- rc.path = createTxn.getPath();
- createNode(
- createTxn.getPath(),
- createTxn.getData(),
- createTxn.getAcl(),
- createTxn.getEphemeral() ? header.getClientId() : 0,
- createTxn.getParentCVersion(),
- header.getZxid(), header.getTime());
- break;
public String createNode(String path, byte data[], List<ACL> acl,
- long ephemeralOwner, int parentCVersion, long zxid, long time)
- throws KeeperException.NoNodeException,
- KeeperException.NodeExistsException {
- //各种参数设置
- int lastSlash = path.lastIndexOf('/');
- String parentName = path.substring(0, lastSlash);
- String childName = path.substring(lastSlash + 1);
- StatPersisted stat = new StatPersisted();
- stat.setCtime(time);
- stat.setMtime(time);
- stat.setCzxid(zxid);
- stat.setMzxid(zxid);
- stat.setPzxid(zxid);
- stat.setVersion(0);
- stat.setAversion(0);
- stat.setEphemeralOwner(ephemeralOwner);
- DataNode parent = nodes.get(parentName);
- if (parent == null) {
- throw new KeeperException.NoNodeException();
- }
- //修改parent节点内容
- synchronized (parent) {
- Set<String> children = parent.getChildren();
- if (children != null) {
- if (children.contains(childName)) {
- throw new KeeperException.NodeExistsException();
- }
- }
- if (parentCVersion == -1) {
- parentCVersion = parent.stat.getCversion();
- parentCVersion++;
- }
- parent.stat.setCversion(parentCVersion);
- parent.stat.setPzxid(zxid);
- Long longval = convertAcls(acl);
- //添加目标节点
- DataNode child = new DataNode(parent, data, longval, stat);
- parent.addChild(childName);
- nodes.put(path, child);
- //添加ephemeral类型节点
- if (ephemeralOwner != 0) {
- HashSet<String> list = ephemerals.get(ephemeralOwner);
- if (list == null) {
- list = new HashSet<String>();
- ephemerals.put(ephemeralOwner, list);
- }
- synchronized (list) {
- list.add(path);
- }
- }
- }
- ......
- //dataWatches和childWatches事件触发
- dataWatches.triggerWatch(path, Event.EventType.NodeCreated);
- childWatches.triggerWatch(parentName.equals("") ? "/" : parentName,
- Event.EventType.NodeChildrenChanged);
- return path;
- }
事件触发过程
- public Set<Watcher> triggerWatch(String path, EventType type, Set<Watcher> supress) {
- WatchedEvent e = new WatchedEvent(type,
- KeeperState.SyncConnected, path);
- HashSet<Watcher> watchers;
- synchronized (this) {
- //拿节点对应的watcher列表,只通知一次
- watchers = watchTable.remove(path);
- .......
- for (Watcher w : watchers) {
- HashSet<String> paths = watch2Paths.get(w);
- if (paths != null) {
- paths.remove(path);
- }
- }
- }
- //挨个通知
- for (Watcher w : watchers) {
- if (supress != null && supress.contains(w)) {
- continue;
- }
- w.process(e);
- }
- return watchers;
- }
具体处理,NIOServerCncx
- synchronized public void process(WatchedEvent event) {
- //事件通知的响应头
- ReplyHeader h = new ReplyHeader(-1, -1L, 0);
- ......
- // Convert WatchedEvent to a type that can be sent over the wire
- WatcherEvent e = event.getWrapper();
- //发送响应
- sendResponse(h, e, "notification");
- }
具体发送:
- synchronized public void sendResponse(ReplyHeader h, Record r, String tag) {
- try {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- // Make space for length
- BinaryOutputArchive bos = BinaryOutputArchive.getArchive(baos);
- try {
- //占位
- baos.write(fourBytes);
- //序列化
- bos.writeRecord(h, "header");
- if (r != null) {
- bos.writeRecord(r, tag);
- }
- baos.close();
- } catch (IOException e) {
- LOG.error("Error serializing response");
- }
- //转换成byte数组
- byte b[] = baos.toByteArray();
- ByteBuffer bb = ByteBuffer.wrap(b);
- //最后写入长度
- bb.putInt(b.length - 4).rewind();
- //通过channel异步写
- sendBuffer(bb);
- if (h.getXid() > 0) {
- synchronized(this){
- outstandingRequests--;
- }
- // check throttling
- synchronized (this.factory) {
- if (zkServer.getInProcess() < outstandingLimit
- || outstandingRequests < 1) {
- sk.selector().wakeup();
- enableRecv();
- }
- }
- }
- } catch(Exception e) {
- LOG.warn("Unexpected exception. Destruction averted.", e);
- }
- }
server端就处理完了,接下来client端SendThread收到响应
if (replyHdr.getXid() == -1) {
- // -1 means notification
- if (LOG.isDebugEnabled()) {
- LOG.debug("Got notification sessionid:0x"
- + Long.toHexString(sessionId));
- }
- //反序列化
- WatcherEvent event = new WatcherEvent();
- event.deserialize(bbia, "response");
- // convert from a server path to a client path
- //路径切换,如果client用了相对路径的话
- if (chrootPath != null) {
- String serverPath = event.getPath();
- if(serverPath.compareTo(chrootPath)==0)
- event.setPath("/");
- else if (serverPath.length() > chrootPath.length())
- event.setPath(serverPath.substring(chrootPath.length()));
- else {
- LOG.warn("Got server path " + event.getPath()
- + " which is too short for chroot path "
- + chrootPath);
- }
- }
- //通过eventThread派发事件,通知之前注册的watcher
- WatchedEvent we = new WatchedEvent(event);
- if (LOG.isDebugEnabled()) {
- LOG.debug("Got " + we + " for sessionid 0x"
- + Long.toHexString(sessionId));
- }
- eventThread.queueEvent( we );
- return;
- }
eventThread端处理和之前类似
- case NodeCreated:
- //通知dataWatcher,只一次
- synchronized (dataWatches) {
- addTo(dataWatches.remove(clientPath), result);
- }
- //通知existWatcher,只一次
- synchronized (existWatches) {
- addTo(existWatches.remove(clientPath), result);
- }
- break;
- if (event instanceof WatcherSetEventPair) {
- // each watcher will process the event
- WatcherSetEventPair pair = (WatcherSetEventPair) event;
- //挨个通知
- for (Watcher watcher : pair.watchers) {
- try {
- watcher.process(pair.event);
- } catch (Throwable t) {
- LOG.error("Error while calling watcher ", t);
- }
- }
- }
相关推荐
### 深入浅出 Zookeeper #### 一、引言与基础知识 ##### 自序 在深入了解Zookeeper之前,我们不妨先从一位实践者的视角出发。最初接触到Zookeeper时,很多人可能会感到困惑,尤其是当其与Kafka这样的分布式消息...
深入浅出Zookeeper核心原理_1
深入浅出Zookeeper核心原理_2
深入浅出Zookeeper核心原理_3
深入浅出Zookeeper核心原理_4
深入浅出Zookeeper核心原理_5
深入浅出Zookeeper核心原理_6
深入浅出Zookeeper核心原理_7
深入浅出Zookeeper核心原理_8
深入浅出Zookeeper核心原理_9
它使用了ZooKeeper自己的会话机制,以便处理客户端与服务端之间的连接和超时等状态。 ZooKeeper的数据模型和操作会实现ZooKeeper的协议,确保集群中的节点可以就数据的更新达成一致。这种一致性不是绝对的,而是...
Zookeeper 的使用场景广泛,例如在 Hadoop 中,它确保集群中只有一个 NameNode 来处理元数据,并存储配置信息。在 HBase 中,Zookeeper 用于监控 HMaster 的状态,确保集群的稳定,并管理 HRegionServer 的生命周期...
Zookeeper是Hadoop分布式调度服务,用来构建分布式应用系统。构建一个分布式应用是一个很复杂的...Zookeeper不能让部分失败的问题彻底消失,但是它提供了一些工具能够让你的分布式应用安全合理的处理部分失败的问题。
本文来自于技术世界,本文介绍了Zookeeper的架构,并组合实例分析了原子广播(ZAB)协议的原理,希望对您的学习有所帮助。Zookeeper是一个分布式协调服务,可用于服务发现,分布式锁,分布式领导选举,配置管理等。这...
在本文中,我们将深入探讨Zookeeper的服务监控与管理,以及如何有效地利用它来提升系统的稳定性和可扩展性。 一、Zookeeper的基本概念 1.1 ZooKeeper数据模型:Zookeeper的数据模型是一种树形结构,类似于文件系统...
zookeeper之分布式环境搭建:深入解析ZooKeeper分布式环境搭建+编程知识+技术开发; zookeeper之分布式环境搭建:深入解析ZooKeeper分布式环境搭建+编程知识+技术开发; zookeeper之分布式环境搭建:深入解析...
2. **数据版本控制**:每当数据发生改变时,ZooKeeper会记录修改的事务ID(mZxid)和数据版本(dataVersion),这有助于确保操作的一致性和原子性。 3. **版本控制**:在执行`set`或`delete`操作时,可以通过指定...
### ZooKeeper 未授权访问漏洞处理方法 #### 一、漏洞背景及原理 **ZooKeeper** 是一个分布式的协调服务框架,它提供了一种高效可靠的解决方案来管理和维护分布式环境中不同节点之间的同步与协调问题。然而,由于...
本文来自于技术世界,本文结合实例演示了使用Zookeeper实现分布式锁与领导选举的原理与具体实现方法。如上文《Zookeeper架构及FastLeaderElection机制》所述,Zookeeper提供了一个类似于Linux文件系统的树形结构。该...
在深入理解源码之前,我们需要先了解ZooKeeper的基本概念和工作原理。 **ZooKeeper的基本概念** 1. **节点(ZNode)**:ZooKeeper 的数据存储结构类似文件系统,由一系列的节点构成,每个节点称为ZNode。每个ZNode...