1: int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 2: { 3: ..... 4: /* 设置目的地址和目标端口 */ 5: inet->dport = usin->sin_port; 6: inet->daddr = daddr; 7: .... 8: /* 初始化MSS上限 */ 9: tp->rx_opt.mss_clamp = 536; 10: 11: /* Socket identity is still unknown (sport may be zero). 12: * However we set state to SYN-SENT and not releasing socket 13: * lock select source port, enter ourselves into the hash tables and 14: * complete initialization after this. 15: */ 16: tcp_set_state(sk, TCP_SYN_SENT);/* 设置状态 */ 17: err = tcp_v4_hash_connect(sk);/* 将传输控制添加到ehash散列表中,并动态分配端口 */ 18: if (err) 19: goto failure; 20: .... 21: if (!tp->write_seq)/* 还未计算初始序号 */ 22: /* 根据双方地址、端口计算初始序号 */ 23: tp->write_seq = secure_tcp_sequence_number(inet->saddr, 24: inet->daddr, 25: inet->sport, 26: usin->sin_port); 27: 28: /* 根据初始序号和当前时间,随机算一个初始id */ 29: inet->id = tp->write_seq ^ jiffies; 30: 31: /* 发送SYN段 */ 32: err = tcp_connect(sk); 33: rt = NULL; 34: if (err) 35: goto failure; 36: 37: return 0; 38: }
1: asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) 2: { 3: struct socket *sock, *newsock; 4: ..... 5: sock = sockfd_lookup(fd, &err);/* 获得侦听端口的socket */ 6: ..... 7: if (!(newsock = sock_alloc()))/* 分配一个新的套接口,用来处理与客户端的连接 */ 8: ..... 9: /* 调用传输层的accept,对TCP来说,是inet_accept */ 10: err = sock->ops->accept(sock, newsock, sock->file->f_flags); 11: .... 12: if (upeer_sockaddr) {/* 调用者需要获取对方套接口地址和端口 */ 13: /* 调用传输层回调获得对方的地址和端口 */ 14: if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { 15: } 16: /* 成功后复制到用户态 */ 17: err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); 18: } 19: ..... 20: if ((err = sock_map_fd(newsock)) < 0)/* 为新连接分配文件描述符 */ 21: 22: return err; 23: }
[注]: 在内核2.6.32以后对应函数为inet_csk_accept().
1: struct sock *tcp_accept(struct sock *sk, int flags, int *err) 2: { 3: .... 4: /* Find already established connection */ 5: if (!tp->accept_queue) {/* accept队列为空,说明还没有收到新连接 */ 6: long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);/* 如果套口是非阻塞的,或者在一定时间内没有新连接,则返回 */ 7: 8: if (!timeo)/* 超时时间到,没有新连接,退出 */ 9: goto out; 10: 11: /* 运行到这里,说明有新连接到来,则等待新的传输控制块 */ 12: error = wait_for_connect(sk, timeo); 13: if (error) 14: goto out; 15: } 16: 17: req = tp->accept_queue; 18: if ((tp->accept_queue = req->dl_next) == NULL) 19: tp->accept_queue_tail = NULL; 20: 21: newsk = req->sk; 22: sk_acceptq_removed(sk); 23: tcp_openreq_fastfree(req); 24: .... 25: 26: return newsk; 27: }
1: /* 构造并发送SYN段 */ 2: int tcp_connect(struct sock *sk) 3: { 4: struct tcp_sock *tp = tcp_sk(sk); 5: struct sk_buff *buff; 6: 7: tcp_connect_init(sk);/* 初始化传输控制块中与连接相关的成员 */ 8: 9: /* 为SYN段分配报文并进行初始化 */ 10: buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation); 11: if (unlikely(buff == NULL)) 12: return -ENOBUFS; 13: 14: /* Reserve space for headers. */ 15: skb_reserve(buff, MAX_TCP_HEADER); 16: 17: TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; 18: TCP_ECN_send_syn(sk, tp, buff); 19: TCP_SKB_CB(buff)->sacked = 0; 20: skb_shinfo(buff)->tso_segs = 1; 21: skb_shinfo(buff)->tso_size = 0; 22: buff->csum = 0; 23: TCP_SKB_CB(buff)->seq = tp->write_seq++; 24: TCP_SKB_CB(buff)->end_seq = tp->write_seq; 25: tp->snd_nxt = tp->write_seq; 26: tp->pushed_seq = tp->write_seq; 27: tcp_ca_init(tp); 28: 29: /* Send it off. */ 30: TCP_SKB_CB(buff)->when = tcp_time_stamp; 31: tp->retrans_stamp = TCP_SKB_CB(buff)->when; 32: 33: /* 将报文添加到发送队列上 */ 34: __skb_queue_tail(&sk->sk_write_queue, buff); 35: sk_charge_skb(sk, buff); 36: tp->packets_out += tcp_skb_pcount(buff); 37: /* 发送SYN段 */ 38: tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); 39: TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); 40: 41: /* Timer for repeating the SYN until an answer. */ 42: /* 启动重传定时器 */ 43: tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); 44: return 0; 45: } 46:
图: 服务端接收到SYN段后,发送SYN/ACK处理流程。
1: /* 向客户端发送SYN+ACK报文 */ 2: static int tcp_v4_send_synack(struct sock *sk, struct open_request *req, 3: struct dst_entry *dst) 4: { 5: int err = -1; 6: struct sk_buff * skb; 7: 8: /* First, grab a route. */ 9: /* 查找到客户端的路由 */ 10: if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) 11: goto out; 12: 13: /* 根据路由、传输控制块、连接请求块中的构建SYN+ACK段 */ 14: skb = tcp_make_synack(sk, dst, req); 15: 16: if (skb) {/* 生成SYN+ACK段成功 */ 17: struct tcphdr *th = skb->h.th; 18: 19: /* 生成校验码 */ 20: th->check = tcp_v4_check(th, skb->len, 21: req->af.v4_req.loc_addr, 22: req->af.v4_req.rmt_addr, 23: csum_partial((char *)th, skb->len, 24: skb->csum)); 25: 26: /* 生成IP数据报并发送出去 */ 27: err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr, 28: req->af.v4_req.rmt_addr, 29: req->af.v4_req.opt); 30: if (err == NET_XMIT_CN) 31: err = 0; 32: } 33: 34: out: 35: dst_release(dst); 36: return err; 37: } 38:
1: /* 在SYN_SENT状态下处理接收到的段,但是不处理带外数据 */ 2: static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, 3: struct tcphdr *th, unsigned len) 4: { 5: struct tcp_sock *tp = tcp_sk(sk); 6: int saved_clamp = tp->rx_opt.mss_clamp; 7: 8: /* 解析TCP选项并保存到传输控制块中 */ 9: tcp_parse_options(skb, &tp->rx_opt, 0); 10: 11: if (th->ack) {/* 处理ACK标志 */ 12: /* rfc793: 13: * "If the state is SYN-SENT then 14: * first check the ACK bit 15: * If the ACK bit is set 16: * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send 17: * a reset (unless the RST bit is set, if so drop 18: * the segment and return)" 19: * 20: * We do not send data with SYN, so that RFC-correct 21: * test reduces to: 22: */ 23: if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt) 24: goto reset_and_undo; 25: 26: if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 27: !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, 28: tcp_time_stamp)) { 29: NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED); 30: goto reset_and_undo; 31: } 32: 33: /* Now ACK is acceptable. 34: * 35: * "If the RST bit is set 36: * If the ACK was acceptable then signal the user "error: 37: * connection reset", drop the segment, enter CLOSED state, 38: * delete TCB, and return." 39: */ 40: 41: if (th->rst) {/* 收到ACK+RST段,需要tcp_reset设置错误码,并关闭套接口 */ 42: tcp_reset(sk); 43: goto discard; 44: } 45: 46: /* rfc793: 47: * "fifth, if neither of the SYN or RST bits is set then 48: * drop the segment and return." 49: * 50: * See note below! 51: * --ANK(990513) 52: */ 53: if (!th->syn)/* 在SYN_SENT状态下接收到的段必须存在SYN标志,否则说明接收到的段无效,丢弃该段 */ 54: goto discard_and_undo; 55: 56: /* rfc793: 57: * "If the SYN bit is on ... 58: * are acceptable then ... 59: * (our SYN has been ACKed), change the connection 60: * state to ESTABLISHED..." 61: */ 62: 63: /* 从首部标志中获取显示拥塞通知的特性 */ 64: TCP_ECN_rcv_synack(tp, th); 65: if (tp->ecn_flags&TCP_ECN_OK)/* 如果支持ECN,则设置标志 */ 66: sk->sk_no_largesend = 1; 67: 68: /* 设置与窗口相关的成员变量 */ 69: tp->snd_wl1 = TCP_SKB_CB(skb)->seq; 70: tcp_ack(sk, skb, FLAG_SLOWPATH); 71: 72: /* Ok.. it‘s good. Set up sequence numbers and 73: * move to established. 74: */ 75: tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 76: tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; 77: 78: /* RFC1323: The window in SYN & SYN/ACK segments is 79: * never scaled. 80: */ 81: tp->snd_wnd = ntohs(th->window); 82: tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); 83: 84: if (!tp->rx_opt.wscale_ok) { 85: tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; 86: tp->window_clamp = min(tp->window_clamp, 65535U); 87: } 88: 89: if (tp->rx_opt.saw_tstamp) {/* 根据是否支持时间戳选项来设置传输控制块的相关字段 */ 90: tp->rx_opt.tstamp_ok = 1; 91: tp->tcp_header_len = 92: sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; 93: tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 94: tcp_store_ts_recent(tp); 95: } else { 96: tp->tcp_header_len = sizeof(struct tcphdr); 97: } 98: 99: /* 初始化PMTU、MSS等成员变量 */ 100: if (tp->rx_opt.sack_ok && sysctl_tcp_fack) 101: tp->rx_opt.sack_ok |= 2; 102: 103: tcp_sync_mss(sk, tp->pmtu_cookie); 104: tcp_initialize_rcv_mss(sk); 105: 106: /* Remember, tcp_poll() does not lock socket! 107: * Change state from SYN-SENT only after copied_seq 108: * is initialized. */ 109: tp->copied_seq = tp->rcv_nxt; 110: mb(); 111: tcp_set_state(sk, TCP_ESTABLISHED); 112: 113: /* Make sure socket is routed, for correct metrics. */ 114: tp->af_specific->rebuild_header(sk); 115: 116: tcp_init_metrics(sk); 117: 118: /* Prevent spurious tcp_cwnd_restart() on first data 119: * packet. 120: */ 121: tp->lsndtime = tcp_time_stamp; 122: 123: tcp_init_buffer_space(sk); 124: 125: /* 如果启用了连接保活,则启用连接保活定时器 */ 126: if (sock_flag(sk, SOCK_KEEPOPEN)) 127: tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); 128: 129: if (!tp->rx_opt.snd_wscale)/* 首部预测 */ 130: __tcp_fast_path_on(tp, tp->snd_wnd); 131: else 132: tp->pred_flags = 0; 133: 134: if (!sock_flag(sk, SOCK_DEAD)) {/* 如果套口不处于SOCK_DEAD状态,则唤醒等待该套接口的进程 */ 135: sk->sk_state_change(sk); 136: sk_wake_async(sk, 0, POLL_OUT); 137: } 138: 139: /* 连接建立完成,根据情况进入延时确认模式 */ 140: if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) { 141: /* Save one ACK. Data will be ready after 142: * several ticks, if write_pending is set. 143: * 144: * It may be deleted, but with this feature tcpdumps 145: * look so _wonderfully_ clever, that I was not able 146: * to stand against the temptation 8) --ANK 147: */ 148: tcp_schedule_ack(tp); 149: tp->ack.lrcvtime = tcp_time_stamp; 150: tp->ack.ato = TCP_ATO_MIN; 151: tcp_incr_quickack(tp); 152: tcp_enter_quickack_mode(tp); 153: tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); 154: 155: discard: 156: __kfree_skb(skb); 157: return 0; 158: } else {/* 不需要延时确认,立即发送ACK段 */ 159: tcp_send_ack(sk); 160: } 161: return -1; 162: } 163: 164: /* No ACK in the segment */ 165: 166: if (th->rst) {/* 收到RST段,则丢弃传输控制块 */ 167: /* rfc793: 168: * "If the RST bit is set 169: * 170: * Otherwise (no ACK) drop the segment and return." 171: */ 172: 173: goto discard_and_undo; 174: } 175: 176: /* PAWS check. */ 177: /* PAWS检测失效,也丢弃传输控制块 */ 178: if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0)) 179: goto discard_and_undo; 180: 181: /* 在SYN_SENT状态下收到了SYN段并且没有ACK,说明是两端同时打开 */ 182: if (th->syn) { 183: /* We see SYN without ACK. It is attempt of 184: * simultaneous connect with crossed SYNs. 185: * Particularly, it can be connect to self. 186: */ 187: tcp_set_state(sk, TCP_SYN_RECV);/* 设置状态为TCP_SYN_RECV */ 188: 189: if (tp->rx_opt.saw_tstamp) {/* 设置时间戳相关的字段 */ 190: tp->rx_opt.tstamp_ok = 1; 191: tcp_store_ts_recent(tp); 192: tp->tcp_header_len = 193: sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED; 194: } else { 195: tp->tcp_header_len = sizeof(struct tcphdr); 196: } 197: 198: /* 初始化窗口相关的成员变量 */ 199: tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; 200: tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; 201: 202: /* RFC1323: The window in SYN & SYN/ACK segments is 203: * never scaled. 204: */ 205: tp->snd_wnd = ntohs(th->window); 206: tp->snd_wl1 = TCP_SKB_CB(skb)->seq; 207: tp->max_window = tp->snd_wnd; 208: 209: TCP_ECN_rcv_syn(tp, th);/* 从首部标志中获取显式拥塞通知的特性。 */ 210: if (tp->ecn_flags&TCP_ECN_OK) 211: sk->sk_no_largesend = 1; 212: 213: /* 初始化MSS相关的成员变量 */ 214: tcp_sync_mss(sk, tp->pmtu_cookie); 215: tcp_initialize_rcv_mss(sk); 216: 217: /* 向对端发送SYN+ACK段,并丢弃接收到的SYN段 */ 218: tcp_send_synack(sk); 219: #if 0 220: /* Note, we could accept data and URG from this segment. 221: * There are no obstacles to make this. 222: * 223: * However, if we ignore data in ACKless segments sometimes, 224: * we have no reasons to accept it sometimes. 225: * Also, seems the code doing it in step6 of tcp_rcv_state_process 226: * is not flawless. So, discard packet for sanity. 227: * Uncomment this return to process the data. 228: */ 229: return -1; 230: #else 231: goto discard; 232: #endif 233: } 234: /* "fifth, if neither of the SYN or RST bits is set then 235: * drop the segment and return." 236: */ 237: 238: discard_and_undo: 239: tcp_clear_options(&tp->rx_opt); 240: tp->rx_opt.mss_clamp = saved_clamp; 241: goto discard; 242: 243: reset_and_undo: 244: tcp_clear_options(&tp->rx_opt); 245: tp->rx_opt.mss_clamp = saved_clamp; 246: return 1; 247: } 248:
1: /* 除了ESTABLISHED和TIME_WAIT状态外,其他状态下的TCP段处理都由本函数实现 */ 2: int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 3: struct tcphdr *th, unsigned len) 4: { 5: struct tcp_sock *tp = tcp_sk(sk); 6: int queued = 0; 7: 8: tp->rx_opt.saw_tstamp = 0; 9: 10: switch (sk->sk_state) { 11: ..... 12: /* SYN_RECV状态的处理 */ 13: if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&/* 解析TCP选项,如果首部中存在时间戳选项 */ 14: tcp_paws_discard(tp, skb)) {/* PAWS检测失败,则丢弃报文 */ 15: if (!th->rst) {/* 如果不是RST段 */ 16: /* 发送DACK给对端,说明接收到的TCP段已经处理过 */ 17: NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); 18: tcp_send_dupack(sk, skb); 19: goto discard; 20: } 21: /* Reset is accepted even if it did not pass PAWS. */ 22: } 23: 24: /* step 1: check sequence number */ 25: if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {/* TCP段序号无效 */ 26: if (!th->rst)/* 如果TCP段无RST标志,则发送DACK给对方 */ 27: tcp_send_dupack(sk, skb); 28: goto discard; 29: } 30: 31: /* step 2: check RST bit */ 32: if(th->rst) {/* 如果有RST标志,则重置连接 */ 33: tcp_reset(sk); 34: goto discard; 35: } 36: 37: /* 如果有必要,则更新时间戳 */ 38: tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); 39: 40: /* step 3: check security and precedence [ignored] */ 41: 42: /* step 4: 43: * 44: * Check for a SYN in window. 45: */ 46: if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {/* 如果有SYN标志并且序号在接收窗口内 */ 47: NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); 48: tcp_reset(sk);/* 复位连接 */ 49: return 1; 50: } 51: 52: /* step 5: check the ACK field */ 53: if (th->ack) {/* 如果有ACK标志 */ 54: /* 检查ACK是否为正常的第三次握手 */ 55: int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); 56: 57: switch(sk->sk_state) { 58: case TCP_SYN_RECV: 59: if (acceptable) { 60: tp->copied_seq = tp->rcv_nxt; 61: mb(); 62: /* 正常的第三次握手,设置连接状态为TCP_ESTABLISHED */ 63: tcp_set_state(sk, TCP_ESTABLISHED); 64: sk->sk_state_change(sk); 65: 66: /* Note, that this wakeup is only for marginal 67: * crossed SYN case. Passively open sockets 68: * are not waked up, because sk->sk_sleep == 69: * NULL and sk->sk_socket == NULL. 70: */ 71: if (sk->sk_socket) {/* 状态已经正常,唤醒那些等待的线程 */ 72: sk_wake_async(sk,0,POLL_OUT); 73: } 74: 75: /* 初始化传输控制块,如果存在时间戳选项,同时平滑RTT为0,则需计算重传超时时间 */ 76: tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 77: tp->snd_wnd = ntohs(th->window) << 78: tp->rx_opt.snd_wscale; 79: tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, 80: TCP_SKB_CB(skb)->seq); 81: 82: /* tcp_ack considers this ACK as duplicate 83: * and does not calculate rtt. 84: * Fix it at least with timestamps. 85: */ 86: if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 87: !tp->srtt) 88: tcp_ack_saw_tstamp(tp, 0); 89: 90: if (tp->rx_opt.tstamp_ok) 91: tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 92: 93: /* Make sure socket is routed, for 94: * correct metrics. 95: */ 96: /* 建立路由,初始化拥塞控制模块 */ 97: tp->af_specific->rebuild_header(sk); 98: 99: tcp_init_metrics(sk); 100: 101: /* Prevent spurious tcp_cwnd_restart() on 102: * first data packet. 103: */ 104: tp->lsndtime = tcp_time_stamp;/* 更新最近一次发送数据包的时间 */ 105: 106: tcp_initialize_rcv_mss(sk); 107: tcp_init_buffer_space(sk); 108: tcp_fast_path_on(tp);/* 计算有关TCP首部预测的标志 */ 109: } else { 110: return 1; 111: } 112: break; 113: ..... 114: } 115: } else 116: goto discard; 117: ..... 118: 119: /* step 6: check the URG bit */ 120: tcp_urg(sk, skb, th);/* 检测带外数据位 */ 121: 122: /* tcp_data could move socket to TIME-WAIT */ 123: if (sk->sk_state != TCP_CLOSE) {/* 如果tcp_data需要发送数据和ACK则在这里处理 */ 124: tcp_data_snd_check(sk); 125: tcp_ack_snd_check(sk); 126: } 127: 128: if (!queued) { /* 如果段没有加入队列,或者前面的流程需要释放报文,则释放它 */ 129: discard: 130: __kfree_skb(skb); 131: } 132: return 0; 133: }
原文:http://www.cnblogs.com/mosp/p/3891783.html