ipv4_conntrack_defrag对输入包进行检查,如果是分片包,则调用nf_ct_ipv4_gather_frags函数进行重组;
1 static unsigned int ipv4_conntrack_defrag(void *priv, 2 struct sk_buff *skb, 3 const struct nf_hook_state *state) 4 { 5 struct sock *sk = skb->sk; 6 7 if (sk && sk_fullsock(sk) && (sk->sk_family == PF_INET) && 8 inet_sk(sk)->nodefrag) 9 return NF_ACCEPT; 10 11 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 12 #if !IS_ENABLED(CONFIG_NF_NAT) 13 /* Previously seen (loopback)? Ignore. Do this before 14 fragment check. */ 15 if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb))) 16 return NF_ACCEPT; 17 #endif 18 #endif 19 /* Gather fragments. */ 20 /* 如果是分片的话进行分片重组 */ 21 if (ip_is_fragment(ip_hdr(skb))) { 22 enum ip_defrag_users user = 23 nf_ct_defrag_user(state->hook, skb); 24 25 if (nf_ct_ipv4_gather_frags(state->net, skb, user)) 26 return NF_STOLEN; 27 } 28 return NF_ACCEPT; 29 }
nf_ct_ipv4_gather_frags内部调用了ip_defrag进行重组,ip_defrag相关分析,请移步IP分片重组;
1 static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb, 2 u_int32_t user) 3 { 4 int err; 5 6 local_bh_disable(); 7 /* 分片重组 */ 8 err = ip_defrag(net, skb, user); 9 local_bh_enable(); 10 11 if (!err) 12 skb->ignore_df = 1; 13 14 return err; 15 }
ipv4_conntrack_in是对nf_conntrack_in的封装,是连接跟踪的输入本机或者由本机转发的入口函数,该函数获取l3proto ,l4proto,调用resolve_normal_ct检查是否有tuple节点,没有则创建,并且与skb关联,并调用l4proto->packet函数对连接状态进行处理;
1 static unsigned int ipv4_conntrack_in(void *priv, 2 struct sk_buff *skb, 3 const struct nf_hook_state *state) 4 { 5 return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 6 }
1 unsigned int 2 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, 3 struct sk_buff *skb) 4 { 5 struct nf_conn *ct, *tmpl; 6 enum ip_conntrack_info ctinfo; 7 struct nf_conntrack_l3proto *l3proto; 8 struct nf_conntrack_l4proto *l4proto; 9 unsigned int *timeouts; 10 unsigned int dataoff; 11 u_int8_t protonum; 12 int ret; 13 14 /* 获取skb关联的nf_conn */ 15 tmpl = nf_ct_get(skb, &ctinfo); 16 17 /* 已经关联了nf_conn或者设置了不跟踪标记 */ 18 if (tmpl || ctinfo == IP_CT_UNTRACKED) { 19 /* Previously seen (loopback or untracked)? Ignore. */ 20 /* 环回 || 不跟踪,返回accept */ 21 if ((tmpl && !nf_ct_is_template(tmpl)) || 22 ctinfo == IP_CT_UNTRACKED) { 23 NF_CT_STAT_INC_ATOMIC(net, ignore); 24 return NF_ACCEPT; 25 } 26 27 /* 清空关联的nf_conn */ 28 skb->_nfct = 0; 29 } 30 31 /* rcu_read_lock()ed by nf_hook_thresh */ 32 /* 根据协议类型找到对应协议的l3proto */ 33 l3proto = __nf_ct_l3proto_find(pf); 34 35 /* 获取数据偏移和4层协议 */ 36 ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 37 &dataoff, &protonum); 38 if (ret <= 0) { 39 pr_debug("not prepared to track yet or error occurred\n"); 40 NF_CT_STAT_INC_ATOMIC(net, error); 41 NF_CT_STAT_INC_ATOMIC(net, invalid); 42 ret = -ret; 43 goto out; 44 } 45 46 /* 根据协议和4层协议号获取l4proto */ 47 l4proto = __nf_ct_l4proto_find(pf, protonum); 48 49 /* It may be an special packet, error, unclean... 50 * inverse of the return code tells to the netfilter 51 * core what to do with the packet. */ 52 /* 如果l4设置了错误检查函数,则进行检查 */ 53 if (l4proto->error != NULL) { 54 ret = l4proto->error(net, tmpl, skb, dataoff, pf, hooknum); 55 if (ret <= 0) { 56 NF_CT_STAT_INC_ATOMIC(net, error); 57 NF_CT_STAT_INC_ATOMIC(net, invalid); 58 ret = -ret; 59 goto out; 60 } 61 /* ICMP[v6] protocol trackers may assign one conntrack. */ 62 if (skb->_nfct) 63 goto out; 64 } 65 repeat: 66 /* 查看hash中是否有对应tuple节点,没有则新建;更新nf_conn_info状态,并且与skb进行关联 */ 67 ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, 68 l3proto, l4proto); 69 if (ret < 0) { 70 /* Too stressed to deal. */ 71 NF_CT_STAT_INC_ATOMIC(net, drop); 72 ret = NF_DROP; 73 goto out; 74 } 75 76 /* 获取skb关联的nf_conn */ 77 ct = nf_ct_get(skb, &ctinfo); 78 /* 没有关联的nf_conn,不是连接合法的一部分 */ 79 if (!ct) { 80 /* Not valid part of a connection */ 81 NF_CT_STAT_INC_ATOMIC(net, invalid); 82 ret = NF_ACCEPT; 83 goto out; 84 } 85 86 /* Decide what timeout policy we want to apply to this flow. */ 87 /* 获取超时策略,扩展中的策略,或者默认l4proto中的策略 */ 88 timeouts = nf_ct_timeout_lookup(net, ct, l4proto); 89 90 /* 处理4层协议的状态,tcp为tcp_packet */ 91 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); 92 if (ret <= 0) { 93 /* Invalid: inverse of the return code tells 94 * the netfilter core what to do */ 95 pr_debug("nf_conntrack_in: Can‘t track with proto module\n"); 96 nf_conntrack_put(&ct->ct_general); 97 skb->_nfct = 0; 98 NF_CT_STAT_INC_ATOMIC(net, invalid); 99 if (ret == -NF_DROP) 100 NF_CT_STAT_INC_ATOMIC(net, drop); 101 /* Special case: TCP tracker reports an attempt to reopen a 102 * closed/aborted connection. We have to go back and create a 103 * fresh conntrack. 104 */ 105 if (ret == -NF_REPEAT) 106 goto repeat; 107 ret = -ret; 108 goto out; 109 } 110 111 /* 第一次收到应答,则设置IPS_SEEN_REPLY_BIT标记,原值为0,则需要记录应答事件 */ 112 if (ctinfo == IP_CT_ESTABLISHED_REPLY && 113 !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) 114 nf_conntrack_event_cache(IPCT_REPLY, ct); 115 out: 116 if (tmpl) 117 nf_ct_put(tmpl); 118 119 return ret; 120 }
resolve_normal_ct函数将数据包中的相关字段设置到tuple中,并且检查hash中是否有该tuple,如果没有则新建tuple,而后设置连接状态,并且与skb进行关联;
1 static int 2 resolve_normal_ct(struct net *net, struct nf_conn *tmpl, 3 struct sk_buff *skb, 4 unsigned int dataoff, 5 u_int16_t l3num, 6 u_int8_t protonum, 7 struct nf_conntrack_l3proto *l3proto, 8 struct nf_conntrack_l4proto *l4proto) 9 { 10 const struct nf_conntrack_zone *zone; 11 struct nf_conntrack_tuple tuple; 12 struct nf_conntrack_tuple_hash *h; 13 enum ip_conntrack_info ctinfo; 14 struct nf_conntrack_zone tmp; 15 struct nf_conn *ct; 16 u32 hash; 17 18 /* 将源目的地址端口协议方向等字段设置到tuple */ 19 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 20 dataoff, l3num, protonum, net, &tuple, l3proto, 21 l4proto)) { 22 pr_debug("Can‘t get tuple\n"); 23 return 0; 24 } 25 26 /* look for tuple match */ 27 /* 从hash中查找tuple */ 28 zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); 29 hash = hash_conntrack_raw(&tuple, net); 30 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 31 32 /* 未找到该tuple */ 33 if (!h) { 34 /* 创建一个节点 */ 35 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 36 skb, dataoff, hash); 37 if (!h) 38 return 0; 39 if (IS_ERR(h)) 40 return PTR_ERR(h); 41 } 42 43 /* 获取到nf_conn */ 44 ct = nf_ct_tuplehash_to_ctrack(h); 45 46 /* It exists; we have (non-exclusive) reference. */ 47 /* 应答方向,已建立连接应答 */ 48 if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { 49 ctinfo = IP_CT_ESTABLISHED_REPLY; 50 } 51 /* 原始方向 */ 52 else { 53 /* Once we‘ve had two way comms, always ESTABLISHED. */ 54 /* 已经见过应答了,那么是已连接状态 */ 55 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { 56 pr_debug("normal packet for %p\n", ct); 57 ctinfo = IP_CT_ESTABLISHED; 58 } 59 /* 有期望连接标记,则设置关联字段 */ 60 else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { 61 pr_debug("related packet for %p\n", ct); 62 ctinfo = IP_CT_RELATED; 63 } 64 /* 其他情况,新连接 */ 65 else { 66 pr_debug("new packet for %p\n", ct); 67 ctinfo = IP_CT_NEW; 68 } 69 } 70 71 /* skb关联nf_conn */ 72 nf_ct_set(skb, ct, ctinfo); 73 return 0; 74 }
ipv4_conntrack_local是由本机发出的数据包连接跟踪的入口,是对nf_conntrack_in函数的封装;
1 static unsigned int ipv4_conntrack_local(void *priv, 2 struct sk_buff *skb, 3 const struct nf_hook_state *state) 4 { 5 /* root is playing with raw sockets. */ 6 if (skb->len < sizeof(struct iphdr) || 7 ip_hdrlen(skb) < sizeof(struct iphdr)) 8 return NF_ACCEPT; 9 10 /* 分片,返回accpet */ 11 if (ip_is_fragment(ip_hdr(skb))) /* IP_NODEFRAG setsockopt set */ 12 return NF_ACCEPT; 13 14 /* 调用conntrack_in */ 15 return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 16 }
ipv4_helper函数查找已经注册的help扩展,如果存在则调用扩展的helper函数;
1 static unsigned int ipv4_helper(void *priv, 2 struct sk_buff *skb, 3 const struct nf_hook_state *state) 4 { 5 struct nf_conn *ct; 6 enum ip_conntrack_info ctinfo; 7 const struct nf_conn_help *help; 8 const struct nf_conntrack_helper *helper; 9 10 /* This is where we call the helper: as the packet goes out. */ 11 /* 获取skb关联的nf_conn */ 12 ct = nf_ct_get(skb, &ctinfo); 13 /* 未关联,或者是 已建立连接的关联连接的响应 */ 14 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 15 return NF_ACCEPT; 16 17 /* 获取help扩展 */ 18 help = nfct_help(ct); 19 20 /* 没有扩展 */ 21 if (!help) 22 return NF_ACCEPT; 23 24 /* rcu_read_lock()ed by nf_hook_thresh */ 25 /* 或者helper */ 26 helper = rcu_dereference(help->helper); 27 if (!helper) 28 return NF_ACCEPT; 29 30 /* 执行扩展的help函数 */ 31 return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 32 ct, ctinfo); 33 }
ipv4_confirm相关函数完成对连接的确认,并且将连接按照方向加入到对应的hash表中;
1 static unsigned int ipv4_confirm(void *priv, 2 struct sk_buff *skb, 3 const struct nf_hook_state *state) 4 { 5 struct nf_conn *ct; 6 enum ip_conntrack_info ctinfo; 7 8 /* 获取skb关联的nf_conn */ 9 ct = nf_ct_get(skb, &ctinfo); 10 /* 未关联,或者是 已建立连接的关联连接的响应 */ 11 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 12 goto out; 13 14 /* adjust seqs for loopback traffic only in outgoing direction */ 15 /* 有调整序号标记,且不是环回包,调整序号 */ 16 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 17 !nf_is_loopback_packet(skb)) { 18 if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { 19 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 20 return NF_DROP; 21 } 22 } 23 out: 24 /* We‘ve seen it coming out the other side: confirm it */ 25 /* 调用conntrack_confirm */ 26 return nf_conntrack_confirm(skb); 27 }
1 static inline int nf_conntrack_confirm(struct sk_buff *skb) 2 { 3 struct nf_conn *ct = (struct nf_conn *)skb_nfct(skb); 4 int ret = NF_ACCEPT; 5 6 /* nf_conn存在 */ 7 if (ct) { 8 /* 未确认,则进行确认 */ 9 if (!nf_ct_is_confirmed(ct)) 10 ret = __nf_conntrack_confirm(skb); 11 /* accpet状态事件通知 */ 12 if (likely(ret == NF_ACCEPT)) 13 nf_ct_deliver_cached_events(ct); 14 } 15 return ret; 16 }
1 int 2 __nf_conntrack_confirm(struct sk_buff *skb) 3 { 4 const struct nf_conntrack_zone *zone; 5 unsigned int hash, reply_hash; 6 struct nf_conntrack_tuple_hash *h; 7 struct nf_conn *ct; 8 struct nf_conn_help *help; 9 struct nf_conn_tstamp *tstamp; 10 struct hlist_nulls_node *n; 11 enum ip_conntrack_info ctinfo; 12 struct net *net; 13 unsigned int sequence; 14 int ret = NF_DROP; 15 16 ct = nf_ct_get(skb, &ctinfo); 17 net = nf_ct_net(ct); 18 19 /* ipt_REJECT uses nf_conntrack_attach to attach related 20 ICMP/TCP RST packets in other direction. Actual packet 21 which created connection will be IP_CT_NEW or for an 22 expected connection, IP_CT_RELATED. */ 23 /* 只对原始方向的连接进行确认,应答方向是已经处理过的 */ 24 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 25 return NF_ACCEPT; 26 27 zone = nf_ct_zone(ct); 28 local_bh_disable(); 29 30 /* 计算原始方向和应答方向的hash */ 31 do { 32 sequence = read_seqcount_begin(&nf_conntrack_generation); 33 /* reuse the hash saved before */ 34 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; 35 hash = scale_hash(hash); 36 reply_hash = hash_conntrack(net, 37 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 38 39 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 40 41 /* We‘re not in hash table, and we refuse to set up related 42 * connections for unconfirmed conns. But packet copies and 43 * REJECT will give spurious warnings here. 44 */ 45 /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ 46 47 /* No external references means no one else could have 48 * confirmed us. 49 */ 50 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 51 pr_debug("Confirming conntrack %p\n", ct); 52 /* We have to check the DYING flag after unlink to prevent 53 * a race against nf_ct_get_next_corpse() possibly called from 54 * user context, else we insert an already ‘dead‘ hash, blocking 55 * further use of that particular connection -JM. 56 */ 57 nf_ct_del_from_dying_or_unconfirmed_list(ct); 58 59 if (unlikely(nf_ct_is_dying(ct))) { 60 nf_ct_add_to_dying_list(ct); 61 goto dying; 62 } 63 64 /* See if there‘s one in the list already, including reverse: 65 NAT could have grabbed it without realizing, since we‘re 66 not in the hash. If there is, we lost race. */ 67 68 /* 下面两个如果找到说明有冲突 */ 69 70 /* 遍历原始方向hash,查找是否有相同节点 */ 71 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) 72 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 73 zone, net)) 74 goto out; 75 76 /* 遍历应答方向hash,查找是否有相同节点 */ 77 hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) 78 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 79 zone, net)) 80 goto out; 81 82 /* Timer relative to confirmation time, not original 83 setting time, otherwise we‘d get timer wrap in 84 weird delay cases. */ 85 /* 设置超时时间 */ 86 ct->timeout += nfct_time_stamp; 87 /* 引用计数增加 */ 88 atomic_inc(&ct->ct_general.use); 89 /* 更新为已确认 */ 90 ct->status |= IPS_CONFIRMED; 91 92 /* set conntrack timestamp, if enabled. */ 93 /* 有时间戳扩展,则设置时间戳 */ 94 tstamp = nf_conn_tstamp_find(ct); 95 if (tstamp) { 96 if (skb->tstamp == 0) 97 __net_timestamp(skb); 98 99 tstamp->start = ktime_to_ns(skb->tstamp); 100 } 101 /* Since the lookup is lockless, hash insertion must be done after 102 * starting the timer and setting the CONFIRMED bit. The RCU barriers 103 * guarantee that no other CPU can find the conntrack before the above 104 * stores are visible. 105 */ 106 /* 将原始节点和应答节点插入到对应的hash中 */ 107 __nf_conntrack_hash_insert(ct, hash, reply_hash); 108 nf_conntrack_double_unlock(hash, reply_hash); 109 local_bh_enable(); 110 111 /* 事件通知 */ 112 help = nfct_help(ct); 113 if (help && help->helper) 114 nf_conntrack_event_cache(IPCT_HELPER, ct); 115 116 nf_conntrack_event_cache(master_ct(ct) ? 117 IPCT_RELATED : IPCT_NEW, ct); 118 return NF_ACCEPT; 119 120 out: 121 /* 加入到dying列表 */ 122 nf_ct_add_to_dying_list(ct); 123 /* 解决冲突?? */ 124 ret = nf_ct_resolve_clash(net, skb, ctinfo, h); 125 dying: 126 nf_conntrack_double_unlock(hash, reply_hash); 127 NF_CT_STAT_INC(net, insert_failed); 128 local_bh_enable(); 129 return ret; 130 }
原文:https://www.cnblogs.com/wanpengcoder/p/11755703.html