ip_queue_xmit是ip层提供给tcp层发送回调,大多数tcp发送都会使用这个回调,tcp层使用tcp_transmit_skb封装了tcp头之后,调用该函数,该函数提供了路由查找校验、封装ip头和ip选项的功能,封装完成之后调用ip_local_out发送数据包;
ip_build_and_send_pkt函数是服务器端在给客户端回复syn+ack时调用的,该函数在构造ip头之后,调用ip_local_out发送数据包;
ip_send_unicast_reply函数目前只用于发送ACK和RST,该函数根据对端发过来的skb构造ip头,然后调用ip_append_data向发送队列中附加/新增数据,最后调用ip_push_pending_frames发送数据包;
1 /* Note: skb->sk can be different from sk, in case of tunnels */ 2 int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl) 3 { 4 struct inet_sock *inet = inet_sk(sk); 5 struct net *net = sock_net(sk); 6 struct ip_options_rcu *inet_opt; 7 struct flowi4 *fl4; 8 struct rtable *rt; 9 struct iphdr *iph; 10 int res; 11 12 /* Skip all of this if the packet is already routed, 13 * f.e. by something like SCTP. 14 */ 15 rcu_read_lock(); 16 inet_opt = rcu_dereference(inet->inet_opt); 17 fl4 = &fl->u.ip4; 18 19 /* 获取skb中的路由缓存 */ 20 rt = skb_rtable(skb); 21 22 /* skb中有缓存则跳转处理 */ 23 if (rt) 24 goto packet_routed; 25 26 /* Make sure we can route this packet. */ 27 /* 检查控制块中的路由缓存 */ 28 rt = (struct rtable *)__sk_dst_check(sk, 0); 29 /* 缓存过期 */ 30 if (!rt) { 31 __be32 daddr; 32 33 /* Use correct destination address if we have options. */ 34 /* 目的地址 */ 35 daddr = inet->inet_daddr; 36 37 /* 严格路由选项 */ 38 if (inet_opt && inet_opt->opt.srr) 39 daddr = inet_opt->opt.faddr; 40 41 /* If this fails, retransmit mechanism of transport layer will 42 * keep trying until route appears or the connection times 43 * itself out. 44 */ 45 /* 查找路由缓存 */ 46 rt = ip_route_output_ports(net, fl4, sk, 47 daddr, inet->inet_saddr, 48 inet->inet_dport, 49 inet->inet_sport, 50 sk->sk_protocol, 51 RT_CONN_FLAGS(sk), 52 sk->sk_bound_dev_if); 53 /* 失败 */ 54 if (IS_ERR(rt)) 55 goto no_route; 56 57 /* 设置控制块的路由缓存 */ 58 sk_setup_caps(sk, &rt->dst); 59 } 60 61 /* 将路由设置到skb中 */ 62 skb_dst_set_noref(skb, &rt->dst); 63 64 packet_routed: 65 /* 严格路由选项 &&使用网关,无路由 */ 66 if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) 67 goto no_route; 68 69 /* OK, we know where to send it, allocate and build IP header. */ 70 /* 加入ip头 */ 71 skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0)); 72 skb_reset_network_header(skb); 73 74 /* 构造ip头 */ 75 iph = ip_hdr(skb); 76 *((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff)); 77 if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df) 78 iph->frag_off = htons(IP_DF); 79 else 80 iph->frag_off = 0; 81 iph->ttl = ip_select_ttl(inet, &rt->dst); 82 iph->protocol = sk->sk_protocol; 83 ip_copy_addrs(iph, fl4); 84 85 /* Transport layer set skb->h.foo itself. */ 86 /* 构造ip选项 */ 87 if (inet_opt && inet_opt->opt.optlen) { 88 iph->ihl += inet_opt->opt.optlen >> 2; 89 ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); 90 } 91 92 /* 设置id */ 93 ip_select_ident_segs(net, skb, sk, 94 skb_shinfo(skb)->gso_segs ?: 1); 95 96 /* TODO : should we use skb->sk here instead of sk ? */ 97 /* QOS等级 */ 98 skb->priority = sk->sk_priority; 99 skb->mark = sk->sk_mark; 100 101 /* 输出 */ 102 res = ip_local_out(net, sk, skb); 103 rcu_read_unlock(); 104 return res; 105 106 no_route: 107 /* 无路由处理 */ 108 rcu_read_unlock(); 109 IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); 110 kfree_skb(skb); 111 return -EHOSTUNREACH; 112 }
1 int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, 2 __be32 saddr, __be32 daddr, struct ip_options_rcu *opt) 3 { 4 struct inet_sock *inet = inet_sk(sk); 5 struct rtable *rt = skb_rtable(skb); 6 struct net *net = sock_net(sk); 7 struct iphdr *iph; 8 9 /* Build the IP header. */ 10 /* 构造ip头 */ 11 skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0)); 12 skb_reset_network_header(skb); 13 iph = ip_hdr(skb); 14 iph->version = 4; 15 iph->ihl = 5; 16 iph->tos = inet->tos; 17 iph->ttl = ip_select_ttl(inet, &rt->dst); 18 iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); 19 iph->saddr = saddr; 20 iph->protocol = sk->sk_protocol; 21 22 /* 分片与否 */ 23 if (ip_dont_fragment(sk, &rt->dst)) { 24 iph->frag_off = htons(IP_DF); 25 iph->id = 0; 26 } else { 27 iph->frag_off = 0; 28 __ip_select_ident(net, iph, 1); 29 } 30 31 /* 选项 */ 32 if (opt && opt->opt.optlen) { 33 iph->ihl += opt->opt.optlen>>2; 34 ip_options_build(skb, &opt->opt, daddr, rt, 0); 35 } 36 37 /* QOS优先级 */ 38 skb->priority = sk->sk_priority; 39 skb->mark = sk->sk_mark; 40 41 /* Send it out. */ 42 /* 输出 */ 43 return ip_local_out(net, skb->sk, skb); 44 }
1 void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, 2 const struct ip_options *sopt, 3 __be32 daddr, __be32 saddr, 4 const struct ip_reply_arg *arg, 5 unsigned int len) 6 { 7 struct ip_options_data replyopts; 8 struct ipcm_cookie ipc; 9 struct flowi4 fl4; 10 struct rtable *rt = skb_rtable(skb); 11 struct net *net = sock_net(sk); 12 struct sk_buff *nskb; 13 int err; 14 int oif; 15 16 /* 获取ip选项 */ 17 if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) 18 return; 19 20 ipc.addr = daddr; 21 ipc.opt = NULL; 22 ipc.tx_flags = 0; 23 ipc.ttl = 0; 24 ipc.tos = -1; 25 26 /* 选项存在 */ 27 if (replyopts.opt.opt.optlen) { 28 ipc.opt = &replyopts.opt; 29 30 /* 源路由存在,设置下一跳ip地址为目的地址 */ 31 if (replyopts.opt.opt.srr) 32 daddr = replyopts.opt.opt.faddr; 33 } 34 35 /* 输出接口设置 */ 36 oif = arg->bound_dev_if; 37 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 38 oif = skb->skb_iif; 39 40 /* 查路由 */ 41 flowi4_init_output(&fl4, oif, 42 IP4_REPLY_MARK(net, skb->mark), 43 RT_TOS(arg->tos), 44 RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol, 45 ip_reply_arg_flowi_flags(arg), 46 daddr, saddr, 47 tcp_hdr(skb)->source, tcp_hdr(skb)->dest, 48 arg->uid); 49 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); 50 rt = ip_route_output_key(net, &fl4); 51 if (IS_ERR(rt)) 52 return; 53 54 55 /* 根据skb更新sk的属性 */ 56 inet_sk(sk)->tos = arg->tos; 57 58 sk->sk_priority = skb->priority; 59 sk->sk_protocol = ip_hdr(skb)->protocol; 60 sk->sk_bound_dev_if = arg->bound_dev_if; 61 sk->sk_sndbuf = sysctl_wmem_default; 62 sk->sk_mark = fl4.flowi4_mark; 63 /* 数据追加到前一个skb或者新建skb后添加到发送队列 */ 64 err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, 65 len, 0, &ipc, &rt, MSG_DONTWAIT); 66 if (unlikely(err)) { 67 ip_flush_pending_frames(sk); 68 goto out; 69 } 70 71 /* 如果发送队列有skb,则计算校验和,发送 */ 72 nskb = skb_peek(&sk->sk_write_queue); 73 if (nskb) { 74 if (arg->csumoffset >= 0) 75 *((__sum16 *)skb_transport_header(nskb) + 76 arg->csumoffset) = csum_fold(csum_add(nskb->csum, 77 arg->csum)); 78 nskb->ip_summed = CHECKSUM_NONE; 79 80 /* 发送数据包 */ 81 ip_push_pending_frames(sk, &fl4); 82 } 83 out: 84 ip_rt_put(rt); 85 }
TCP->IP输出 之 ip_queue_xmit、ip_build_and_send_pkt、ip_send_unicast_reply
原文:https://www.cnblogs.com/wanpengcoder/p/11755349.html