首页 > 其他 > 详细

深入理解TCP协议及其源代码

时间:2019-12-26 19:28:12      阅读:90      评论:0      收藏:0      [点我收藏+]

1.三次握手原理

技术分享图片

 

 

  • 第一次握手。如图,TCP双方在进行连接时首先由发起连接请求,设置TCP头部内容SYN=1。请求中附带连接参数,包括随机数字起点Seq(预防传输时字节序列被预测收到攻击)。
  • 第二次握手:当Server(服务器)分配资源打开监听请求,收到客户端请求后,对请求头进行解析。若连接建立成功则分配相应资源,并返回针对客户端请求的确认报文,其中响应报文头部参数包括:连接建立标志位SYN、Server端针对该通信过程的随机Seq、针对该请求的确认号ack、可附加接收窗口大小信息等。
  • 第三次握手。客户端收到服务端的确认连接请求后将会发送对该确认请求的确认,让客户端再次确认是为不让客户端占用宝贵的服务器资源。
    若以上三次握手都没问题则连接建立,在第三次握手的时候即可开始传送数据。

2.linux中tcp协议入口函数(_init_inet_init()函数)

在TCP协议栈中,入口函数是init_inet_init()函数,如下所示,大概流程如下arp_init()- >ip_init()->tcp_v4_init()->tcp_init()->udp_init()->udplite4_register()->ping_init()。

static int __init inet_init(void)
{
    struct inet_protosw *q;
    struct list_head *r;
    int rc = -EINVAL;
 
    BUILD_BUG_ON(sizeof(struct inet_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb));
 
    sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
    if (!sysctl_local_reserved_ports)
        goto out;
 

    rc = proto_register(&tcp_prot, 1); 
    if (rc)
        goto out_free_reserved_ports;
 
 
    rc = proto_register(&udp_prot, 1);
    if (rc)
        goto out_unregister_tcp_proto;
 
  
    rc = proto_register(&raw_prot, 1);
    if (rc)
        goto out_unregister_udp_proto;
 

    rc = proto_register(&ping_prot, 1);
    if (rc)
        goto out_unregister_raw_proto;
 
    /*
     *    Tell SOCKET that we are alive...
     */
 
    (void)sock_register(&inet_family_ops);
 
#ifdef CONFIG_SYSCTL
    ip_static_sysctl_init();
#endif
 
    tcp_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem;
 
    /*
     *    Add all the base protocols.
     */
 
    if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
        pr_crit("%s: Cannot add ICMP protocol\n", __func__);
    if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
        pr_crit("%s: Cannot add UDP protocol\n", __func__);
    if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
        pr_crit("%s: Cannot add TCP protocol\n", __func__);
#ifdef CONFIG_IP_MULTICAST
    if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
        pr_crit("%s: Cannot add IGMP protocol\n", __func__);
#endif
 
    /* Register the socket-side information for inet_create. */
    for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
        INIT_LIST_HEAD(r);
 
    for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
        inet_register_protosw(q);
 
    /*
     *    Set the ARP module up
     */
 
    arp_init();
 
    /*
     *    Set the IP module up
     */
 
    ip_init();
 
    tcp_v4_init();
 
    /* Setup TCP slab cache for open requests. */
    tcp_init();
 
    /* Setup UDP memory threshold */
    udp_init();
 
    /* Add UDP-Lite (RFC 3828) */
    udplite4_register();
 
    ping_init();
 
    /*
     *    Set the ICMP layer up
     */
 
    if (icmp_init() < 0)
        panic("Failed to create the ICMP control socket.\n");
 
    /*
     *    Initialise the multicast router
     */
#if defined(CONFIG_IP_MROUTE)
    if (ip_mr_init())
        pr_crit("%s: Cannot init ipv4 mroute\n", __func__);
#endif
    /*
     *    Initialise per-cpu ipv4 mibs
     */
 
    if (init_ipv4_mibs())
        pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
 
    ipv4_proc_init();
 
    ipfrag_init();
 
    dev_add_pack(&ip_packet_type);
 
    rc = 0;
out:
    return rc;
out_unregister_raw_proto:
    proto_unregister(&raw_prot);
out_unregister_udp_proto:
    proto_unregister(&udp_prot);
out_unregister_tcp_proto:
    proto_unregister(&tcp_prot);
out_free_reserved_ports:
    kfree(sysctl_local_reserved_ports);
    goto out;
}
 
fs_initcall(inet_init);

3.linux中接受数据(tcp_v4_rcv函数)

在TCP协议栈中,接受数据是由tcp_v4_rcv函数实现的,TCP数据包被分装在sk_buff skb中。该函数从hash表中去寻找匹配的TCP端口号。匹配成功端口号后,随后便将数据交付给tcp_v4_do_rcv函数,在函数中检查socket的状态,若该socket的状态是TCP_ESTABLISHED,数据就被传送到tcp_v4_rcv_established函数中,随后被放到copy接收队列中。

 

int tcp_v4_rcv(struct sk_buff *skb)
{
  ...

    TCP_SKB_CB(skb)->seq = ntohl(th->seq);

    TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
                    skb->len - th->doff * 4);

    TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);

    TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
    TCP_SKB_CB(skb)->tcp_tw_isn = 0;

    TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
    TCP_SKB_CB(skb)->sacked     = 0;
   
process:
    if (sk->sk_state == TCP_TIME_WAIT)
        goto do_time_wait;

    if (sk->sk_state == TCP_NEW_SYN_RECV) {
        struct request_sock *req = inet_reqsk(sk);
        struct sock *nsk;


        sk = req->rsk_listener;
        if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
            sk_drops_add(sk, skb);
            reqsk_put(req);
            goto discard_it;
        }


        if (unlikely(sk->sk_state != TCP_LISTEN)) {

            inet_csk_reqsk_queue_drop_and_put(sk, req);


            goto lookup;
        }
        /* We own a reference on the listener, increase it again
         * as we might lose it too soon.
         */
        sock_hold(sk);
        refcounted = true;


        nsk = tcp_check_req(sk, skb, req, false);


        if (!nsk) {
            reqsk_put(req);
            goto discard_and_relse;
        }


        if (nsk == sk) {
            reqsk_put(req);
        } 

        else if (tcp_child_process(sk, nsk, skb)) {
         
            tcp_v4_send_reset(nsk, skb);
            goto discard_and_relse;
        } else {
            sock_put(sk);
            return 0;
        }
    }

   
    if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
        __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
        goto discard_and_relse;
    }

    if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
        goto discard_and_relse;

    if (tcp_v4_inbound_md5_hash(sk, skb))
        goto discard_and_relse;


    nf_reset(skb);


    if (tcp_filter(sk, skb))
        goto discard_and_relse;


    th = (const struct tcphdr *)skb->data;
    iph = ip_hdr(skb);


    skb->dev = NULL;


    if (sk->sk_state == TCP_LISTEN) {
        ret = tcp_v4_do_rcv(sk, skb);
        goto put_and_return;
    }

 


    sk_incoming_cpu_update(sk);

    bh_lock_sock_nested(sk);

    tcp_segs_in(tcp_sk(sk), skb);
    ret = 0;


    if (!sock_owned_by_user(sk)) {

        if (!tcp_prequeue(sk, skb))

            ret = tcp_v4_do_rcv(sk, skb);
    } 

    else if (tcp_add_backlog(sk, skb)) {
        goto discard_and_relse;
    }
    bh_unlock_sock(sk);

put_and_return:

    if (refcounted)
        sock_put(sk);

    return ret;

no_tcp_socket:
    if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
        goto discard_it;

    if (tcp_checksum_complete(skb)) {
csum_error:
        __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
        __TCP_INC_STATS(net, TCP_MIB_INERRS);
    } else {

        tcp_v4_send_reset(NULL, skb);
    }

discard_it:
    /* Discard frame. */
    kfree_skb(skb);
    return 0;

discard_and_relse:
    sk_drops_add(sk, skb);
    if (refcounted)
        sock_put(sk);
    goto discard_it;

do_time_wait:
    if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
        inet_twsk_put(inet_twsk(sk));
        goto discard_it;
    }


    if (tcp_checksum_complete(skb)) {
        inet_twsk_put(inet_twsk(sk));
        goto csum_error;
    }

    switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {


    case TCP_TW_SYN: {

        struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
                            &tcp_hashinfo, skb,
                            __tcp_hdrlen(th),
                            iph->saddr, th->source,
                            iph->daddr, th->dest,
                            inet_iif(skb));

        if (sk2) {

            inet_twsk_deschedule_put(inet_twsk(sk));

            sk = sk2;
            refcounted = false;

      
            goto process;
        }
        /* Fall through to ACK */
    }


    case TCP_TW_ACK:
        tcp_v4_timewait_ack(sk, skb);
        break;

    case TCP_TW_RST:
        tcp_v4_send_reset(sk, skb);

        inet_twsk_deschedule_put(inet_twsk(sk));
        goto discard_it;

    case TCP_TW_SUCCESS:;
    }
    goto discard_it;
}

4.链接请求报文(tcp_v4_send_synack函数)

以客户端发送链接请求为例,即是文章开头的SYN=1,SEQ=x。该部分函数主要是查询路由表,并构造TCP数据报文,然后发送出去。在从路由表中找到目标IP地址后,就开始使用tcp_make_synack构造TCP请求链接数据报文,在构造链接请求报文时,要按照tcp。构造完成之后,就发送出去

  static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
                    struct dst_entry *dst)
   {
       int err = -1;
       struct sk_buff * skb;
   
       /* First, grab a route. */
  
      if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
          goto out;
  

     skb = tcp_make_synack(sk, dst, req);
 
      if (skb) {
          struct tcphdr *th = skb->h.th;
  
     
          th->check = tcp_v4_check(th, skb->len,
                       req->af.v4_req.loc_addr,
                       req->af.v4_req.rmt_addr,
                       csum_partial((char *)th, skb->len,
                                skb->csum));
  

          err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
                          req->af.v4_req.rmt_addr,
                          req->af.v4_req.opt);
          if (err == NET_XMIT_CN)
              err = 0;
      }
  
  out:
      dst_release(dst);
      return err;
 }

5.tcp状态处理函数(tcp_rcv_synsent_state_process函数)

该函数主要根据状态,对sockt,数据包做出相应动作。

  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
                       struct tcphdr *th, unsigned len)
    {
     if (th->ack) {
         /* rfc3
          * "If the state is SYN-SENT then
          *    first check the ACK bit
          *      If the ACK bit is set
          *    If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
          *        a reset (unless the RST bit is set, if so drop
          *        the segment and return)"
          *
          *  We do not send data with SYN, so that RFC-correct
          *  test reduces to
          */
         if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
             goto reset_and_undo;
         if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
             !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
                  tcp_time_stamp)) {
             NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
             goto reset_and_undo;
         }
         /* Now ACK is acceptable.
          *
          * "If the RST bit is set
          *    If the ACK was acceptable then signal the user "error
          *    connection reset", drop the segment, enter CLOSED state,
          *    delete TCB, and return."
          */
         if (th->rst) {
             tcp_reset(sk);
             goto discard;
         }
         /* rfc
          *   "fifth, if neither of the SYN or RST bits is set then
          *    drop the segment and return."
          *
          *    See note below!
          *                                        --ANK()
          */
         if (!th->syn)
             goto discard_and_undo;
   discard:
            __kfree_skb(skb);
            return ;
        } else {/*tcp_send_ack();
    alloc_skb();
    tcp_transmit_skb();
            tcp_send_ack(sk);
        }
        return -1;
    }

    if (th->syn) {
        /* We see SYN without ACK. It is attempt of
         * simultaneous connect with crossed SYNs.
         * Particularly, it can be connect to self.
         */
       tcp_set_state(sk, TCP_SYN_RECV);

 

 

深入理解TCP协议及其源代码

原文:https://www.cnblogs.com/zwjsec/p/12103581.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!