1 前言
在tfs集群服务里,nameserver采用HA架构:由heart agent维护着的nameserver的vip,对于客户端和数据节点来说,nameserver会和vip直接沟通,所以至于后台有多少个nameserver和具体由那个nameserver提供服务是透明的。另外nameserver的职责:维护了所有block数据的位置信息、元数据信息及其两者之间的映射信息,所以其是一个有状态的节点;并且负责所有dataserver的状态检查;负载、迁移等任务下发;所以nameserver类似整个集群的大脑,作用至关重要。而要完成nameserver的主从之间的无缝实时切换,需要保证两点要求:
a 平时运行时期,保证主nameserver控制器与从nameserver是状态对等的
b 发生故障切换时期,保证主从nameserver的切换对用户是透明的
2 代码实现与原理
a 透明切换
NameServerHeartManager 负责与从nameserver之间的心跳,完成master和slave角色之间的切换。切换的逻辑较简单:只需将vip从原来的主nameserver控制器到从nameserver控制器的映射切换,代码如下:
//nameserver主备状态信息 struct NsRuntimeGlobalInformation { uint64_t heart_ip_port_; uint64_t owner_ip_port_; uint64_t peer_ip_port_; int64_t switch_time_; int64_t discard_newblk_safe_mode_time_; int64_t lease_id_; int64_t lease_expired_time_; int64_t startup_time_; uint32_t vip_; bool destroy_flag_; int8_t owner_role_; int8_t peer_role_; int8_t owner_status_; int8_t peer_status_; } //peer_ip_port_ 表示对等的对方ip&port //owner_ip_port_ 表示本地的ip&port //vip_ 表示当前主控服务器的代理ip,通过vip_可以判断自己的身份 // 心跳检查 void NameServerHeartManager::check_() { time_t now = 0; NsKeepAliveType keepalive_type_ = NS_KEEPALIVE_TYPE_LOGIN; int32_t sleep_time = SYSPARAM_NAMESERVER.heart_interval_ / 2; NsRuntimeGlobalInformation& ngi = GFactory::get_runtime_info(); while (!ngi.is_destroyed()) { now = Func::get_monotonic_time(); ns_role_establish_(ngi, now); if (ngi.is_master()) { ns_check_lease_expired_(ngi, now); } else { keepalive_(sleep_time, keepalive_type_, ngi, now); if (!ngi.has_valid_lease(now)) keepalive_type_ = NS_KEEPALIVE_TYPE_LOGIN; else keepalive_type_ = NS_KEEPALIVE_TYPE_RENEW; } if (sleep_time <= 0) { sleep_time = SYSPARAM_NAMESERVER.heart_interval_ / 2; sleep_time = std::max(sleep_time, 1); } Func::sleep(sleep_time, ngi.destroy_flag_); } keepalive_type_ = NS_KEEPALIVE_TYPE_LOGOUT; keepalive_(sleep_time, keepalive_type_, ngi, now); } //如果vip飘到本地ip,则表示本节点成主控制器 int NameServerHeartManager::ns_role_establish_(NsRuntimeGlobalInformation& ngi, const time_t now) { if (check_vip_(ngi))//vip is local ip { if (!ngi.is_master())//slave, switch switch_role_salve_to_master_(ngi, now); } else { if (ngi.is_master()) switch_role_master_to_slave_(ngi, now); } return TFS_SUCCESS; }
b 状态同步:
切换后仍需要正常提供工作,不影响用户的操作,即需要镜像主nameserver到从nameserver的状态。OpLogSyncManager: 负责主控服务nameserver节点的所有操作日志在从控服务nameserver上的发送、接收与重放,同步两者之间的状态,代码如下:
//重播日志消息类 class OpLogSyncMessage: public common::BasePacket { bool alloc_; int32_t length_; char* data_; }; //解析日志消息类,并根据日志的操作类型,进行相应的重放在从nameserver上 int OpLogSyncManager::replay_helper(const char* const data, const int64_t data_len, int64_t& pos, const time_t now) { OpLogHeader header; int32_t ret = ((NULL != data) && (data_len - pos >= header.length())) ? TFS_SUCCESS : EXIT_PARAMETER_ERROR; if (TFS_SUCCESS == ret) { ret = header.deserialize(data, data_len, pos); if (TFS_SUCCESS == ret) { uint32_t crc = 0; crc = Func::crc(crc, (data + pos), header.length_); if (crc != header.crc_) { TBSYS_LOG(ERROR, "check crc: %u<>: %u error", header.crc_, crc); ret = EXIT_CHECK_CRC_ERROR; } else { int8_t type = header.type_; switch (type) { case OPLOG_TYPE_REPLICATE_MSG: case OPLOG_TYPE_COMPACT_MSG: ret = replay_helper_do_msg(type, data, data_len, pos); break; case OPLOG_TYPE_BLOCK_OP: ret = replay_helper_do_oplog(now, type, data, data_len, pos); break; default: TBSYS_LOG(WARN, "type: %d not found", type); ret = EXIT_PLAY_LOG_ERROR; break; } } } } return ret; }
原文:http://www.cnblogs.com/gisorange/p/4905507.html