看了ceph的关于snap的一些代码(snap.cc、snaprealm.cc、snapserver.cc、snapmapper.cc),作了标注,在此记录。
snap.cc主要是通过encode(序列化),将数据结构表示为二进制流的方式,decode(反序列化),以便通过网络传输或保存在磁盘等存储介质上。
以下是snaprealm.h的一些结构体的声明和注释
1 struct SnapRealm { 2 // realm state 3 4 sr_t srnode; 5 6 // in-memory state 7 MDCache *mdcache; 8 CInode *inode; 9 10 bool open; // set to true once all past_parents are opened 11 SnapRealm *parent; 12 set<SnapRealm*> open_children; // active children that are currently open 13 map<inodeno_t,SnapRealm*> open_past_parents; // these are explicitly pinned. 14 15 // cache 16 snapid_t cached_seq; // max seq over self and all past+present parents. 17 snapid_t cached_last_created; // max last_created over all past+present parents 18 snapid_t cached_last_destroyed; 19 set<snapid_t> cached_snaps; 20 SnapContext cached_snap_context; 21 22 bufferlist cached_snap_trace; 23 24 elist<CInode*> inodes_with_caps; // for efficient realm splits 25 map<client_t, xlist<Capability*>* > client_caps; // to identify clients who need snap notifications
snaprealm.cc:
1 #include "SnapRealm.h" 2 #include "MDCache.h" 3 #include "MDS.h" 4 5 #include "messages/MClientSnap.h" 6 7 8 /* 9 * SnapRealm 10 */ 11 12 #define dout_subsys ceph_subsys_mds 13 #undef dout_prefix 14 #define dout_prefix _prefix(_dout, mdcache->mds->get_nodeid(), inode, srnode.seq, this) 15 static ostream& _prefix(std::ostream *_dout, int whoami, CInode *inode, 16 uint64_t seq, SnapRealm *realm) { 17 return *_dout << " mds." << whoami 18 << ".cache.snaprealm(" << inode->ino() 19 << " seq " << seq << " " << realm << ") "; 20 } 21 22 ostream& operator<<(ostream& out, const SnapRealm& realm) 23 { 24 out << "snaprealm(" << realm.inode->ino() 25 << " seq " << realm.srnode.seq 26 << " lc " << realm.srnode.last_created 27 << " cr " << realm.srnode.created; 28 if (realm.srnode.created != realm.srnode.current_parent_since) 29 out << " cps " << realm.srnode.current_parent_since; 30 out << " snaps=" << realm.srnode.snaps; 31 if (realm.srnode.past_parents.size()) { 32 out << " past_parents=("; 33 for (map<snapid_t, snaplink_t>::const_iterator p = realm.srnode.past_parents.begin(); 34 p != realm.srnode.past_parents.end(); 35 ++p) { 36 if (p != realm.srnode.past_parents.begin()) out << ","; 37 out << p->second.first << "-" << p->first 38 << "=" << p->second.ino; 39 } 40 out << ")"; 41 } 42 out << " " << &realm << ")"; 43 return out; 44 } 45 46 //添加parent到open_past_parents的map容器中 47 void SnapRealm::add_open_past_parent(SnapRealm *parent) 48 { 49 open_past_parents[parent->inode->ino()] = parent; 50 parent->inode->get(CInode::PIN_PASTSNAPPARENT); //标记为CInode::PIN_PASTSNAPPARENT 51 } 52 53 struct C_SR_RetryOpenParents : public MDSInternalContextBase { 54 SnapRealm *sr; 55 snapid_t first, last, parent_last; 56 inodeno_t parent; 57 MDSInternalContextBase* fin; 58 C_SR_RetryOpenParents(SnapRealm *s, snapid_t f, snapid_t l, snapid_t pl, 59 inodeno_t p, MDSInternalContextBase *c) : 60 sr(s), first(f), last(l), parent_last(pl), parent(p), fin(c) {} 61 MDS *get_mds() { return sr->mdcache->mds; } 62 void finish(int r) { 63 if (r < 0) 64 sr->_remove_missing_parent(parent_last, parent, r); 65 if (sr->_open_parents(fin, first, last)) 66 fin->complete(0); 67 } 68 }; 69 70 //删除找不到snapid的parent 71 void SnapRealm::_remove_missing_parent(snapid_t snapid, inodeno_t parent, int err) 72 { 73 map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.find(snapid); 74 if (p != srnode.past_parents.end()) { 75 dout(10) << __func__ << " " << parent << " [" << p->second.first << "," 76 << p->first << "] errno " << err << dendl; 77 srnode.past_parents.erase(p); 78 } else { 79 dout(10) << __func__ << " " << parent << " not found" << dendl; 80 } 81 } 82 83 //判定parents是否open,返回true or false 84 bool SnapRealm::_open_parents(MDSInternalContextBase *finish, snapid_t first, snapid_t last) 85 { 86 dout(10) << "open_parents [" << first << "," << last << "]" << dendl; 87 if (open) 88 return true; 89 90 // make sure my current parents‘ parents are open... 91 if (parent) { 92 dout(10) << " current parent [" << srnode.current_parent_since << ",head] is " << *parent 93 << " on " << *parent->inode << dendl; 94 if (last >= srnode.current_parent_since && 95 !parent->_open_parents(finish, MAX(first, srnode.current_parent_since), last)) 96 return false; 97 } 98 99 // and my past parents too! 100 assert(srnode.past_parents.size() >= open_past_parents.size()); 101 if (srnode.past_parents.size() > open_past_parents.size()) { 102 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin(); 103 p != srnode.past_parents.end(); 104 ++p) { 105 dout(10) << " past_parent [" << p->second.first << "," << p->first << "] is " 106 << p->second.ino << dendl; 107 CInode *parent = mdcache->get_inode(p->second.ino); 108 if (!parent) { 109 C_SR_RetryOpenParents *fin = new C_SR_RetryOpenParents(this, first, last, p->first, 110 p->second.ino, finish); 111 mdcache->open_ino(p->second.ino, mdcache->mds->mdsmap->get_metadata_pool(), fin); 112 return false; 113 } 114 assert(parent->snaprealm); // hmm! 115 if (!parent->snaprealm->_open_parents(finish, p->second.first, p->first)) 116 return false; 117 if (!open_past_parents.count(p->second.ino)) { 118 add_open_past_parent(parent->snaprealm); 119 } 120 } 121 } 122 123 open = true; 124 return true; 125 } 126 127 //判断从first到last的snap的past_parents是否open 128 bool SnapRealm::have_past_parents_open(snapid_t first, snapid_t last) 129 { 130 dout(10) << "have_past_parents_open [" << first << "," << last << "]" << dendl; 131 if (open) 132 return true; 133 134 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first); 135 p != srnode.past_parents.end(); 136 ++p) { 137 if (p->second.first > last) 138 break; 139 dout(10) << " past parent [" << p->second.first << "," << p->first << "] was " 140 << p->second.ino << dendl; 141 if (open_past_parents.count(p->second.ino) == 0) { 142 dout(10) << " past parent " << p->second.ino << " is not open" << dendl; 143 return false; 144 } 145 if (!open_past_parents[p->second.ino]->have_past_parents_open(MAX(first, p->second.first), 146 MIN(last, p->first))) 147 return false; 148 } 149 150 open = true; 151 return true; 152 } 153 154 155 void SnapRealm::close_parents() 156 { 157 for (map<inodeno_t,SnapRealm*>::iterator p = open_past_parents.begin(); 158 p != open_past_parents.end(); 159 ++p) 160 p->second->inode->put(CInode::PIN_PASTSNAPPARENT); //解除标记CInode::PIN_PASTSNAPPARENT 161 open_past_parents.clear(); //清空 162 } 163 164 165 /* 166 * get list of snaps for this realm. we must include parents‘ snaps 167 * for the intervals during which they were our parent. 168 */ 169 //将realm中的snaps取出并存入第一个参数set容器中 170 void SnapRealm::build_snap_set(set<snapid_t> &s, 171 snapid_t& max_seq, snapid_t& max_last_created, snapid_t& max_last_destroyed, 172 snapid_t first, snapid_t last) 173 { 174 dout(10) << "build_snap_set [" << first << "," << last << "] on " << *this << dendl; 175 176 if (srnode.seq > max_seq) 177 max_seq = srnode.seq; 178 if (srnode.last_created > max_last_created) 179 max_last_created = srnode.last_created; 180 if (srnode.last_destroyed > max_last_destroyed) 181 max_last_destroyed = srnode.last_destroyed; 182 183 // include my snaps within interval [first,last] 184 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first 185 p != srnode.snaps.end() && p->first <= last; 186 ++p) 187 s.insert(p->first); 188 189 // include snaps for parents during intervals that intersect [first,last] 190 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first); 191 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; 192 ++p) { 193 CInode *oldparent = mdcache->get_inode(p->second.ino); 194 assert(oldparent); // call open_parents first! 195 assert(oldparent->snaprealm); 196 oldparent->snaprealm->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, 197 MAX(first, p->second.first), 198 MIN(last, p->first)); //递归,将past_parent的snapid加入set容器 199 } 200 if (srnode.current_parent_since <= last && parent) 201 parent->build_snap_set(s, max_seq, max_last_created, max_last_destroyed, 202 MAX(first, srnode.current_parent_since), last); //递归,将current_parent的snapid加入set容器 203 } 204 205 206 //检查cache中的数据结构并更新cached_snaps和cached_snap_trace 207 void SnapRealm::check_cache() 208 { 209 if (cached_seq >= srnode.seq) 210 return; 211 212 cached_snaps.clear(); 213 cached_snap_context.clear(); 214 215 cached_last_created = srnode.last_created; 216 cached_last_destroyed = srnode.last_destroyed; 217 cached_seq = srnode.seq; 218 build_snap_set(cached_snaps, cached_seq, cached_last_created, cached_last_destroyed, 219 0, CEPH_NOSNAP); 220 221 cached_snap_trace.clear(); 222 build_snap_trace(cached_snap_trace); 223 224 dout(10) << "check_cache rebuilt " << cached_snaps 225 << " seq " << srnode.seq 226 << " cached_seq " << cached_seq 227 << " cached_last_created " << cached_last_created 228 << " cached_last_destroyed " << cached_last_destroyed 229 << ")" << dendl; 230 } 231 232 233 const set<snapid_t>& SnapRealm::get_snaps() 234 { 235 check_cache(); 236 dout(10) << "get_snaps " << cached_snaps 237 << " (seq " << srnode.seq << " cached_seq " << cached_seq << ")" 238 << dendl; 239 return cached_snaps; 240 } 241 242 /* 243 * build vector in reverse sorted order 244 */ 245 //循环将存储snapid的set中的内容放入返回值cached_snap_context中 246 const SnapContext& SnapRealm::get_snap_context() 247 { 248 check_cache(); 249 250 if (!cached_snap_context.seq) { 251 cached_snap_context.seq = cached_seq; 252 cached_snap_context.snaps.resize(cached_snaps.size()); 253 unsigned i = 0; 254 for (set<snapid_t>::reverse_iterator p = cached_snaps.rbegin(); 255 p != cached_snaps.rend(); 256 ++p) 257 cached_snap_context.snaps[i++] = *p; 258 } 259 260 return cached_snap_context; 261 } 262 263 //得到cache_snap,并将其info存入infomap的map容器中 264 void SnapRealm::get_snap_info(map<snapid_t,SnapInfo*>& infomap, snapid_t first, snapid_t last) 265 { 266 const set<snapid_t>& snaps = get_snaps(); 267 dout(10) << "get_snap_info snaps " << snaps << dendl; 268 269 // include my snaps within interval [first,last] 270 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first 271 p != srnode.snaps.end() && p->first <= last; 272 ++p) 273 infomap[p->first] = &p->second; //令键为p->first的infomap的值等于&p->second 274 275 // include snaps for parents during intervals that intersect [first,last] 276 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first); 277 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; 278 ++p) { 279 CInode *oldparent = mdcache->get_inode(p->second.ino); 280 assert(oldparent); // call open_parents first! 281 assert(oldparent->snaprealm); 282 oldparent->snaprealm->get_snap_info(infomap, 283 MAX(first, p->second.first), 284 MIN(last, p->first)); 285 } 286 if (srnode.current_parent_since <= last && parent) 287 parent->get_snap_info(infomap, MAX(first, srnode.current_parent_since), last); 288 } 289 290 const string& SnapRealm::get_snapname(snapid_t snapid, inodeno_t atino) 291 { 292 if (srnode.snaps.count(snapid)) { 293 if (atino == inode->ino()) 294 return srnode.snaps[snapid].name; 295 else 296 return srnode.snaps[snapid].get_long_name(); 297 } 298 299 map<snapid_t,snaplink_t>::iterator p = srnode.past_parents.lower_bound(snapid); 300 if (p != srnode.past_parents.end() && p->second.first <= snapid) { 301 CInode *oldparent = mdcache->get_inode(p->second.ino); 302 assert(oldparent); // call open_parents first! 303 assert(oldparent->snaprealm); 304 return oldparent->snaprealm->get_snapname(snapid, atino); 305 } 306 307 assert(srnode.current_parent_since <= snapid); 308 assert(parent); 309 return parent->get_snapname(snapid, atino); 310 } 311 312 //判断第一个参数n,在srnode.snaps从first到last范围内找出对应的snapid 313 snapid_t SnapRealm::resolve_snapname(const string& n, inodeno_t atino, snapid_t first, snapid_t last) 314 { 315 // first try me 316 dout(10) << "resolve_snapname ‘" << n << "‘ in [" << first << "," << last << "]" << dendl; 317 318 //snapid_t num; 319 //if (n[0] == ‘~‘) num = atoll(n.c_str()+1); 320 321 bool actual = (atino == inode->ino()); 322 string pname; 323 inodeno_t pino; 324 if (!actual) { 325 if (!n.length() || 326 n[0] != ‘_‘) return 0; 327 int next_ = n.find(‘_‘, 1); 328 if (next_ < 0) return 0; 329 pname = n.substr(1, next_ - 1); 330 pino = atoll(n.c_str() + next_ + 1); 331 dout(10) << " " << n << " parses to name ‘" << pname << "‘ dirino " << pino << dendl; 332 } 333 334 //根据snapinfo得到snapid 335 for (map<snapid_t, SnapInfo>::iterator p = srnode.snaps.lower_bound(first); // first element >= first 336 p != srnode.snaps.end() && p->first <= last; 337 ++p) { 338 dout(15) << " ? " << p->second << dendl; 339 //if (num && p->second.snapid == num) 340 //return p->first; 341 if (actual && p->second.name == n) 342 return p->first; 343 if (!actual && p->second.name == pname && p->second.ino == pino) 344 return p->first; 345 } 346 347 // include snaps for parents during intervals that intersect [first,last] 348 for (map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.lower_bound(first); 349 p != srnode.past_parents.end() && p->first >= first && p->second.first <= last; 350 ++p) { 351 CInode *oldparent = mdcache->get_inode(p->second.ino); 352 assert(oldparent); // call open_parents first! 353 assert(oldparent->snaprealm); 354 snapid_t r = oldparent->snaprealm->resolve_snapname(n, atino, 355 MAX(first, p->second.first), 356 MIN(last, p->first)); 357 if (r) 358 return r; 359 } 360 if (parent && srnode.current_parent_since <= last) 361 return parent->resolve_snapname(n, atino, MAX(first, srnode.current_parent_since), last); 362 return 0; 363 } 364 365 366 //调整parent令变量parent等于当前parent 367 void SnapRealm::adjust_parent() 368 { 369 SnapRealm *newparent = inode->get_parent_dn()->get_dir()->get_inode()->find_snaprealm(); 370 if (newparent != parent) { 371 dout(10) << "adjust_parent " << parent << " -> " << newparent << dendl; 372 if (parent) 373 parent->open_children.erase(this); 374 parent = newparent; 375 if (parent) 376 parent->open_children.insert(this); 377 378 invalidate_cached_snaps(); 379 } 380 } 381 382 //将child分离出当前realm 383 void SnapRealm::split_at(SnapRealm *child) 384 { 385 dout(10) << "split_at " << *child 386 << " on " << *child->inode << dendl; 387 388 if (inode->is_mdsdir() || !child->inode->is_dir()) { 389 // it‘s not a dir. 390 if (child->inode->containing_realm) { 391 // - no open children. 392 // - only need to move this child‘s inode‘s caps. 393 child->inode->move_to_realm(child); //脱离containing_realm,加入到child realm 394 } else { 395 // no caps, nothing to move/split. 396 dout(20) << " split no-op, no caps to move on file " << *child->inode << dendl; 397 assert(!child->inode->is_any_caps()); 398 } 399 return; 400 } 401 402 // it‘s a dir. 403 404 // split open_children 405 dout(10) << " open_children are " << open_children << dendl; 406 for (set<SnapRealm*>::iterator p = open_children.begin(); 407 p != open_children.end(); ) { 408 SnapRealm *realm = *p; 409 if (realm != child && 410 child->inode->is_projected_ancestor_of(realm->inode)) { 411 dout(20) << " child gets child realm " << *realm << " on " << *realm->inode << dendl; 412 realm->parent = child; 413 child->open_children.insert(realm); 414 open_children.erase(p++); 415 } else { 416 dout(20) << " keeping child realm " << *realm << " on " << *realm->inode << dendl; 417 ++p; 418 } 419 } 420 421 // split inodes_with_caps 422 elist<CInode*>::iterator p = inodes_with_caps.begin(member_offset(CInode, item_caps)); 423 while (!p.end()) { 424 CInode *in = *p; 425 ++p; 426 427 // does inode fall within the child realm? 428 bool under_child = false; 429 430 if (in == child->inode) { 431 under_child = true; 432 } else { 433 CInode *t = in; 434 while (t->get_parent_dn()) { 435 t = t->get_parent_dn()->get_dir()->get_inode(); 436 if (t == child->inode) { 437 under_child = true; 438 break; 439 } 440 if (t == in) 441 break; 442 } 443 } 444 if (under_child) { 445 dout(20) << " child gets " << *in << dendl; 446 in->move_to_realm(child); //脱离containing_realm,加入到child realm 447 } else { 448 dout(20) << " keeping " << *in << dendl; 449 } 450 } 451 452 } 453 454 const bufferlist& SnapRealm::get_snap_trace() 455 { 456 check_cache(); 457 return cached_snap_trace; 458 } 459 460 void SnapRealm::build_snap_trace(bufferlist& snapbl) //将snaps和所有parents存储在snapbl中 461 { 462 SnapRealmInfo info(inode->ino(), srnode.created, srnode.seq, srnode.current_parent_since); 463 464 if (parent) { 465 info.h.parent = parent->inode->ino(); 466 if (!srnode.past_parents.empty()) { 467 snapid_t last = srnode.past_parents.rbegin()->first; 468 set<snapid_t> past; 469 snapid_t max_seq, max_last_created, max_last_destroyed; 470 build_snap_set(past, max_seq, max_last_created, max_last_destroyed, 0, last); 471 info.prior_parent_snaps.reserve(past.size()); 472 for (set<snapid_t>::reverse_iterator p = past.rbegin(); p != past.rend(); ++p) 473 info.prior_parent_snaps.push_back(*p); 474 dout(10) << "build_snap_trace prior_parent_snaps from [1," << last << "] " 475 << info.prior_parent_snaps << dendl; 476 } 477 } else 478 info.h.parent = 0; 479 480 info.my_snaps.reserve(srnode.snaps.size()); 481 for (map<snapid_t,SnapInfo>::reverse_iterator p = srnode.snaps.rbegin(); 482 p != srnode.snaps.rend(); 483 ++p) 484 info.my_snaps.push_back(p->first); 485 dout(10) << "build_snap_trace my_snaps " << info.my_snaps << dendl; 486 487 ::encode(info, snapbl); 488 489 if (parent) 490 parent->build_snap_trace(snapbl); 491 } 492 493 494 //删除past_parents中有但cached_snaps中没有的 495 void SnapRealm::prune_past_parents() 496 { 497 dout(10) << "prune_past_parents" << dendl; 498 check_cache(); 499 assert(open); 500 501 map<snapid_t, snaplink_t>::iterator p = srnode.past_parents.begin(); 502 while (p != srnode.past_parents.end()) { 503 set<snapid_t>::iterator q = cached_snaps.lower_bound(p->second.first); 504 if (q == cached_snaps.end() || //若cached_snaps中没有p指向的值则erase 505 *q > p->first) { 506 dout(10) << "prune_past_parents pruning [" << p->second.first << "," << p->first 507 << "] " << p->second.ino << dendl; 508 srnode.past_parents.erase(p++); 509 } else { 510 dout(10) << "prune_past_parents keeping [" << p->second.first << "," << p->first 511 << "] " << p->second.ino << dendl; 512 ++p; 513 } 514 } 515 }
snapserver.cc:
1 #include "SnapServer.h" 2 #include "MDS.h" 3 #include "osd/OSDMap.h" 4 #include "osdc/Objecter.h" 5 #include "mon/MonClient.h" 6 7 #include "include/types.h" 8 #include "messages/MMDSTableRequest.h" 9 #include "messages/MRemoveSnaps.h" 10 11 #include "msg/Messenger.h" 12 13 #include "common/config.h" 14 #include "include/assert.h" 15 16 #define dout_subsys ceph_subsys_mds 17 #undef dout_prefix 18 #define dout_prefix *_dout << "mds." << rank << ".snap " 19 20 21 void SnapServer::reset_state() //重置状态,将snaps和need_to_purge中的内容清空 22 { 23 last_snap = 1; /* snapid 1 reserved for initial root snaprealm */ 24 snaps.clear(); 25 need_to_purge.clear(); 26 27 // find any removed snapshot in data pools 28 snapid_t first_free = 0; 29 const OSDMap *osdmap = mds->objecter->get_osdmap_read(); 30 for (set<int64_t>::const_iterator p = mds->mdsmap->get_data_pools().begin(); 31 p != mds->mdsmap->get_data_pools().end(); 32 ++p) { 33 const pg_pool_t *pi = osdmap->get_pg_pool(*p); 34 if (!pi->removed_snaps.empty() && 35 pi->removed_snaps.range_end() > first_free) 36 first_free = pi->removed_snaps.range_end(); 37 } 38 mds->objecter->put_osdmap_read(); 39 if (first_free > last_snap) 40 last_snap = first_free; //扩展last_snap范围 41 } 42 43 44 // SERVER 45 //根据操作执行:创建-将bl中的内容加入到pending_creat中,销毁-将bl中的内容加入到pending_destroy中 46 void SnapServer::_prepare(bufferlist &bl, uint64_t reqid, mds_rank_t bymds) 47 { 48 bufferlist::iterator p = bl.begin(); 49 __u32 op; 50 ::decode(op, p); 51 52 switch (op) { 53 case TABLE_OP_CREATE: 54 { 55 version++; 56 57 SnapInfo info; 58 ::decode(info.ino, p); 59 if (!p.end()) { 60 ::decode(info.name, p); 61 ::decode(info.stamp, p); 62 info.snapid = ++last_snap; 63 pending_create[version] = info; 64 dout(10) << "prepare v" << version << " create " << info << dendl; 65 } else { 66 pending_noop.insert(version); 67 dout(10) << "prepare v" << version << " noop" << dendl; 68 } 69 bl.clear(); 70 ::encode(last_snap, bl); 71 } 72 break; 73 74 case TABLE_OP_DESTROY: 75 { 76 inodeno_t ino; 77 snapid_t snapid; 78 ::decode(ino, p); // not used, currently. 79 ::decode(snapid, p); 80 version++; 81 82 // bump last_snap... we use it as a version value on the snaprealm. 83 ++last_snap; 84 85 pending_destroy[version] = pair<snapid_t,snapid_t>(snapid, last_snap); 86 dout(10) << "prepare v" << version << " destroy " << snapid << " seq " << last_snap << dendl; 87 88 bl.clear(); 89 ::encode(last_snap, bl); 90 } 91 break; 92 93 default: 94 assert(0); 95 } 96 //dump(); 97 } 98 99 //返回容器中此id出现的次数 100 bool SnapServer::_is_prepared(version_t tid) 101 { 102 return 103 pending_create.count(tid) || 104 pending_destroy.count(tid); 105 } 106 107 //将操作pending_create或pending_destroy的内容存入对应的结构中 108 bool SnapServer::_commit(version_t tid, MMDSTableRequest *req) 109 { 110 if (pending_create.count(tid)) { //若此tid在pending_creat中有值,清空并将此结构中对应snapid中的内容存入snaps中 111 dout(7) << "commit " << tid << " create " << pending_create[tid] << dendl; 112 snaps[pending_create[tid].snapid] = pending_create[tid]; 113 pending_create.erase(tid); 114 } 115 116 else if (pending_destroy.count(tid)) { //若此tid在pending_creat中有值,清空并将对应内容加入到need_to_purge 117 snapid_t sn = pending_destroy[tid].first; //removed_snap 118 snapid_t seq = pending_destroy[tid].second; //seq 119 dout(7) << "commit " << tid << " destroy " << sn << " seq " << seq << dendl; 120 snaps.erase(sn); 121 122 for (set<int64_t>::const_iterator p = mds->mdsmap->get_data_pools().begin(); 123 p != mds->mdsmap->get_data_pools().end(); 124 ++p) { 125 need_to_purge[*p].insert(sn); 126 need_to_purge[*p].insert(seq); 127 } 128 129 pending_destroy.erase(tid); 130 } 131 else if (pending_noop.count(tid)) { //清空pending_noop 132 dout(7) << "commit " << tid << " noop" << dendl; 133 pending_noop.erase(tid); 134 } 135 else 136 assert(0); 137 138 // bump version. 139 version++; 140 //dump(); 141 return true; 142 } 143 144 //输出要执行的回滚命令并清空pending_create等结构中的内容,不执行具体操作 145 void SnapServer::_rollback(version_t tid) 146 { 147 if (pending_create.count(tid)) { 148 dout(7) << "rollback " << tid << " create " << pending_create[tid] << dendl; 149 pending_create.erase(tid); 150 } 151 152 else if (pending_destroy.count(tid)) { 153 dout(7) << "rollback " << tid << " destroy " << pending_destroy[tid] << dendl; 154 pending_destroy.erase(tid); 155 } 156 157 else if (pending_noop.count(tid)) { 158 dout(7) << "rollback " << tid << " noop" << dendl; 159 pending_noop.erase(tid); 160 } 161 162 else 163 assert(0); 164 165 // bump version. 166 version++; 167 //dump(); 168 } 169 170 //将传入参数bl中的项在need_to_purge的对应项擦除 171 void SnapServer::_server_update(bufferlist& bl) 172 { 173 bufferlist::iterator p = bl.begin(); 174 map<int, vector<snapid_t> > purge; 175 ::decode(purge, p); 176 177 dout(7) << "_server_update purged " << purge << dendl; 178 for (map<int, vector<snapid_t> >::iterator p = purge.begin(); 179 p != purge.end(); 180 ++p) { 181 for (vector<snapid_t>::iterator q = p->second.begin(); 182 q != p->second.end(); 183 ++q) 184 need_to_purge[p->first].erase(*q); 185 if (need_to_purge[p->first].empty()) 186 need_to_purge.erase(p->first); 187 } 188 189 version++; 190 } 191 192 void SnapServer::handle_query(MMDSTableRequest *req) 193 { 194 req->put(); 195 } 196 197 198 //遍历need_to_purge,执行remove的操作 199 void SnapServer::check_osd_map(bool force) 200 { 201 if (!force && version == last_checked_osdmap) { 202 dout(10) << "check_osd_map - version unchanged" << dendl; 203 return; 204 } 205 dout(10) << "check_osd_map need_to_purge=" << need_to_purge << dendl; 206 207 map<int, vector<snapid_t> > all_purge; 208 map<int, vector<snapid_t> > all_purged; 209 210 const OSDMap *osdmap = mds->objecter->get_osdmap_read(); 211 for (map<int, set<snapid_t> >::iterator p = need_to_purge.begin(); //遍历need_to_purge 212 p != need_to_purge.end(); 213 ++p) { 214 int id = p->first; 215 const pg_pool_t *pi = osdmap->get_pg_pool(id); 216 for (set<snapid_t>::iterator q = p->second.begin(); 217 q != p->second.end(); 218 ++q) { 219 if (pi->is_removed_snap(*q)) { //已经remove,存入all_purged,还没有remove则存入all_purge 220 dout(10) << " osdmap marks " << *q << " as removed" << dendl; 221 all_purged[id].push_back(*q); 222 } else { 223 all_purge[id].push_back(*q); 224 } 225 } 226 } 227 mds->objecter->put_osdmap_read(); 228 229 if (!all_purged.empty()) { //已经purge,调用do_server_update更新mdlog 230 // prepare to remove from need_to_purge list 231 bufferlist bl; 232 ::encode(all_purged, bl); 233 do_server_update(bl); 234 } 235 236 if (!all_purge.empty()) { //需要purge,创建消息存储all_purge并发送给mon 237 dout(10) << "requesting removal of " << all_purge << dendl; 238 MRemoveSnaps *m = new MRemoveSnaps(all_purge); 239 mds->monc->send_mon_message(m); 240 } 241 242 last_checked_osdmap = version; 243 } 244 245 246 247 void SnapServer::dump(Formatter *f) const 248 { 249 f->open_object_section("snapserver"); 250 251 f->dump_int("last_snap", last_snap.val); 252 253 f->open_array_section("pending_noop"); 254 for(set<version_t>::const_iterator i = pending_noop.begin(); i != pending_noop.end(); ++i) { 255 f->dump_unsigned("version", *i); 256 } 257 f->close_section(); 258 259 f->open_array_section("snaps"); 260 for (map<snapid_t, SnapInfo>::const_iterator i = snaps.begin(); i != snaps.end(); ++i) { 261 f->open_object_section("snap"); 262 i->second.dump(f); 263 f->close_section(); 264 } 265 f->close_section(); 266 267 f->open_object_section("need_to_purge"); 268 for (map<int, set<snapid_t> >::const_iterator i = need_to_purge.begin(); i != need_to_purge.end(); ++i) { 269 stringstream pool_id; 270 pool_id << i->first; 271 f->open_array_section(pool_id.str().c_str()); 272 for (set<snapid_t>::const_iterator s = i->second.begin(); s != i->second.end(); ++s) { 273 f->dump_unsigned("snapid", s->val); 274 } 275 f->close_section(); 276 } 277 f->close_section(); 278 279 f->open_array_section("pending_create"); 280 for(map<version_t, SnapInfo>::const_iterator i = pending_create.begin(); i != pending_create.end(); ++i) { 281 f->open_object_section("snap"); 282 f->dump_unsigned("version", i->first); 283 f->open_object_section("snapinfo"); 284 i->second.dump(f); 285 f->close_section(); 286 f->close_section(); 287 } 288 f->close_section(); 289 290 f->open_array_section("pending_destroy"); 291 for(map<version_t, pair<snapid_t, snapid_t> >::const_iterator i = pending_destroy.begin(); i != pending_destroy.end(); ++i) { 292 f->open_object_section("snap"); 293 f->dump_unsigned("version", i->first); 294 f->dump_unsigned("removed_snap", i->second.first); 295 f->dump_unsigned("seq", i->second.second); 296 f->close_section(); 297 } 298 f->close_section(); 299 300 f->close_section(); 301 } 302 303 void SnapServer::generate_test_instances(list<SnapServer*>& ls) 304 { 305 list<SnapInfo*> snapinfo_instances; 306 SnapInfo::generate_test_instances(snapinfo_instances); 307 SnapInfo populated_snapinfo = *(snapinfo_instances.back()); 308 for (list<SnapInfo*>::iterator i = snapinfo_instances.begin(); i != snapinfo_instances.end(); ++i) { 309 delete *i; 310 } 311 312 SnapServer *blank = new SnapServer(); 313 ls.push_back(blank); 314 SnapServer *populated = new SnapServer(); 315 populated->last_snap = 123; 316 populated->snaps[456] = populated_snapinfo; 317 populated->need_to_purge[2].insert(012); 318 populated->pending_create[234] = populated_snapinfo; 319 populated->pending_destroy[345].first = 567; 320 populated->pending_destroy[345].second = 768; 321 populated->pending_noop.insert(890); 322 323 ls.push_back(populated); 324 325 }
snapmapper.h中有一段注释如下,
/**
* SnapMapper
*
* Manages two mappings:
* 1) hobject_t -> {snapid} //为每个克隆对象存储快照集
* 2) snapid -> {hobject_t} //存储快照集的对象作为其快照
*
* We accomplish this using two sets of keys:
* 1) OBJECT_PREFIX + obj.str() -> encoding of object_snaps
* 2) MAPPING_PREFIX + snapid_t + obj.str() -> encoding of pair<snapid_t, obj>
*
* The on disk strings and encodings are implemented in to_raw, to_raw_key,
* from_raw, to_object_key.
*
* The object -> {snapid} mapping is primarily included so that the
* SnapMapper state can be verified against the external PG state during
* scrub etc.
*
* The 2) mapping is arranged such that all objects in a particular
* snap will sort together, and so that all objects in a pg for a
* particular snap will group under up to 8 prefixes. //最多8前缀需要进行检查以确定用于特定pg在一个特定的单元的所有对象
*/
snapmapper.cc:
1 #include "SnapMapper.h" 2 3 #define dout_subsys ceph_subsys_osd 4 #undef dout_prefix 5 #define dout_prefix *_dout << "snap_mapper." 6 7 using std::string; 8 9 const string SnapMapper::MAPPING_PREFIX = "MAP_"; 10 const string SnapMapper::OBJECT_PREFIX = "OBJ_"; 11 12 int OSDriver::get_keys( 13 const std::set<std::string> &keys, 14 std::map<std::string, bufferlist> *out) 15 { 16 return os->omap_get_values(cid, hoid, keys, out); 17 } 18 19 int OSDriver::get_next( 20 const std::string &key, 21 pair<std::string, bufferlist> *next) 22 { 23 ObjectMap::ObjectMapIterator iter = 24 os->get_omap_iterator(cid, hoid); 25 if (!iter) { 26 assert(0); 27 return -EINVAL; 28 } 29 iter->upper_bound(key); 30 if (iter->valid()) { 31 if (next) 32 *next = make_pair(iter->key(), iter->value()); 33 return 0; 34 } else { 35 return -ENOENT; 36 } 37 } 38 39 struct Mapping { 40 snapid_t snap; 41 hobject_t hoid; 42 Mapping(const pair<snapid_t, hobject_t> &in) 43 : snap(in.first), hoid(in.second) {} 44 Mapping() : snap(0) {} 45 void encode(bufferlist &bl) const { 46 ENCODE_START(1, 1, bl); 47 ::encode(snap, bl); 48 ::encode(hoid, bl); 49 ENCODE_FINISH(bl); 50 } 51 void decode(bufferlist::iterator &bl) { 52 DECODE_START(1, bl); 53 ::decode(snap, bl); 54 ::decode(hoid, bl); 55 DECODE_FINISH(bl); 56 } 57 }; 58 WRITE_CLASS_ENCODER(Mapping) 59 60 string SnapMapper::get_prefix(snapid_t snap) 61 { 62 char buf[100]; 63 int len = snprintf( 64 buf, sizeof(buf), 65 "%.*X_", (int)(sizeof(snap)*2), 66 static_cast<unsigned>(snap)); 67 return MAPPING_PREFIX + string(buf, len); 68 } 69 70 string SnapMapper::to_raw_key( //实现mapping有两种key,此为raw_key 71 const pair<snapid_t, hobject_t> &in) 72 { 73 return get_prefix(in.first) + shard_prefix + in.second.to_str(); 74 } 75 76 pair<string, bufferlist> SnapMapper::to_raw( //返回键值和对应map 77 const pair<snapid_t, hobject_t> &in) 78 { 79 bufferlist bl; 80 ::encode(Mapping(in), bl); 81 return make_pair( 82 to_raw_key(in), 83 bl); 84 } 85 86 pair<snapid_t, hobject_t> SnapMapper::from_raw( //根据map返回snapid和hobject对应的snap和hoid 与to_raw相反 87 const pair<std::string, bufferlist> &image) 88 { 89 Mapping map; 90 bufferlist bl(image.second); 91 bufferlist::iterator bp(bl.begin()); 92 ::decode(map, bp); 93 return make_pair(map.snap, map.hoid); 94 } 95 96 bool SnapMapper::is_mapping(const string &to_test) 97 { 98 return to_test.substr(0, MAPPING_PREFIX.size()) == MAPPING_PREFIX; 99 } 100 101 string SnapMapper::to_object_key(const hobject_t &hoid) //实现mapping有两种key,此为object_key 102 { 103 return OBJECT_PREFIX + shard_prefix + hoid.to_str(); 104 } 105 106 void SnapMapper::object_snaps::encode(bufferlist &bl) const 107 { 108 ENCODE_START(1, 1, bl); 109 ::encode(oid, bl); 110 ::encode(snaps, bl); 111 ENCODE_FINISH(bl); 112 } 113 114 void SnapMapper::object_snaps::decode(bufferlist::iterator &bl) 115 { 116 DECODE_START(1, bl); 117 ::decode(oid, bl); 118 ::decode(snaps, bl); 119 DECODE_FINISH(bl); 120 } 121 122 int SnapMapper::get_snaps( //根据iod从backend中找到对应的snaps并输出 123 const hobject_t &oid, 124 object_snaps *out) 125 { 126 assert(check(oid)); 127 set<string> keys; 128 map<string, bufferlist> got; 129 keys.insert(to_object_key(oid)); 130 int r = backend.get_keys(keys, &got); 131 if (r < 0) 132 return r; 133 if (got.empty()) 134 return -ENOENT; 135 if (out) { 136 bufferlist::iterator bp = got.begin()->second.begin(); 137 ::decode(*out, bp); 138 dout(20) << __func__ << " " << oid << " " << out->snaps << dendl; 139 assert(!out->snaps.empty()); 140 } else { 141 dout(20) << __func__ << " " << oid << " (out == NULL)" << dendl; 142 } 143 return 0; 144 } 145 146 void SnapMapper::clear_snaps( //remove to_object_key 147 const hobject_t &oid, 148 MapCacher::Transaction<std::string, bufferlist> *t) 149 { 150 assert(check(oid)); 151 set<string> to_remove; 152 to_remove.insert(to_object_key(oid)); 153 backend.remove_keys(to_remove, t); 154 } 155 156 void SnapMapper::set_snaps( //将object_key和snap设置为一个map,并调用set_keys加入到Transaction结构中 157 const hobject_t &oid, 158 const object_snaps &in, 159 MapCacher::Transaction<std::string, bufferlist> *t) 160 { 161 assert(check(oid)); 162 map<string, bufferlist> to_set; 163 bufferlist bl; 164 ::encode(in, bl); 165 to_set[to_object_key(oid)] = bl; 166 backend.set_keys(to_set, t); 167 } 168 169 int SnapMapper::update_snaps( //将new_snaps更新加入backend中 170 const hobject_t &oid, 171 const set<snapid_t> &new_snaps, 172 const set<snapid_t> *old_snaps_check, 173 MapCacher::Transaction<std::string, bufferlist> *t) 174 { 175 dout(20) << __func__ << " " << oid << " " << new_snaps 176 << " was " << (old_snaps_check ? *old_snaps_check : set<snapid_t>()) 177 << dendl; 178 assert(check(oid)); 179 if (new_snaps.empty()) 180 return remove_oid(oid, t); 181 182 object_snaps out; 183 int r = get_snaps(oid, &out); 184 if (r < 0) 185 return r; 186 if (old_snaps_check) 187 assert(out.snaps == *old_snaps_check); 188 189 object_snaps in(oid, new_snaps); 190 set_snaps(oid, in, t); 191 192 set<string> to_remove; 193 for (set<snapid_t>::iterator i = out.snaps.begin(); 194 i != out.snaps.end(); 195 ++i) { 196 if (!new_snaps.count(*i)) { 197 to_remove.insert(to_raw_key(make_pair(*i, oid))); 198 } 199 } 200 backend.remove_keys(to_remove, t); 201 return 0; 202 } 203 204 void SnapMapper::add_oid( 205 const hobject_t &oid, 206 const set<snapid_t>& snaps, 207 MapCacher::Transaction<std::string, bufferlist> *t) 208 { 209 dout(20) << __func__ << " " << oid << " " << snaps << dendl; 210 assert(check(oid)); 211 { 212 object_snaps out; 213 int r = get_snaps(oid, &out); 214 assert(r == -ENOENT); 215 } 216 217 object_snaps _snaps(oid, snaps); 218 set_snaps(oid, _snaps, t); 219 220 map<string, bufferlist> to_add; 221 for (set<snapid_t>::iterator i = snaps.begin(); 222 i != snaps.end(); 223 ++i) { 224 to_add.insert(to_raw(make_pair(*i, oid))); 225 } 226 backend.set_keys(to_add, t); //将to_add和t打包加入backend 227 } 228 229 int SnapMapper::get_next_object_to_trim( 230 snapid_t snap, 231 hobject_t *hoid) 232 { 233 for (set<string>::iterator i = prefixes.begin(); 234 i != prefixes.end(); 235 ++i) { 236 string list_after(get_prefix(snap) + *i); 237 238 pair<string, bufferlist> next; 239 int r = backend.get_next(list_after, &next); 240 if (r < 0) { 241 break; // Done 242 } 243 244 if (next.first.substr(0, list_after.size()) != 245 list_after) { 246 continue; // Done with this prefix 247 } 248 249 assert(is_mapping(next.first)); 250 251 pair<snapid_t, hobject_t> next_decoded(from_raw(next)); 252 assert(next_decoded.first == snap); 253 assert(check(next_decoded.second)); 254 255 if (hoid) 256 *hoid = next_decoded.second; 257 return 0; 258 } 259 return -ENOENT; 260 } 261 262 263 int SnapMapper::remove_oid( 264 const hobject_t &oid, 265 MapCacher::Transaction<std::string, bufferlist> *t) 266 { 267 dout(20) << __func__ << " " << oid << dendl; 268 assert(check(oid)); 269 return _remove_oid(oid, t); 270 } 271 272 int SnapMapper::_remove_oid( 273 const hobject_t &oid, 274 MapCacher::Transaction<std::string, bufferlist> *t) 275 { 276 object_snaps out; 277 int r = get_snaps(oid, &out); 278 if (r < 0) 279 return r; 280 281 clear_snaps(oid, t); //remove to_object_key 282 283 set<string> to_remove; 284 for (set<snapid_t>::iterator i = out.snaps.begin(); 285 i != out.snaps.end(); 286 ++i) { 287 to_remove.insert(to_raw_key(make_pair(*i, oid))); 288 } 289 backend.remove_keys(to_remove, t); //remove to_raw_key 290 return 0; 291 } 292 293 int SnapMapper::get_snaps( //根据oid找到对应snaps并存在set容器snaps中 294 const hobject_t &oid, 295 std::set<snapid_t> *snaps) 296 { 297 assert(check(oid)); 298 object_snaps out; 299 int r = get_snaps(oid, &out); 300 if (r < 0) 301 return r; 302 if (snaps) 303 snaps->swap(out.snaps); 304 return 0; 305 }
原文:http://www.cnblogs.com/noblemore/p/4954330.html