tcp 客户端 synack的接收 以及 相互connect
阅读原文时间:2023年07月14日阅读:1

tcp_v4_rcv

    |--> tcp_v4_do_rcv

               |-> tcp_rcv_state_process

                         |-> tcp_rcv_synsent_state_process

1. 状态为ESTABLISHED时,用tcp_rcv_established()接收处理。
2. 状态为LISTEN时,说明这个sock处于监听状态,用于被动打开的接收处理,包括SYN和ACK。
3. 当状态不为ESTABLISHED或TIME_WAIT时,用tcp_rcv_state_process()处理。

/* The socket must have it's spinlock held when we get
* here.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
* This is because we cannot sleep with the original spinlock
* held.
*//*
* TCP传输层接收到段之后,经过了简单的
* 校验,并确定接收处理该段的传输控制
* 块之后,除非处于FIN_WAIT_2或TIME_WAIT状态,
* 否则都会调用tcp_v4_do_rcv()作具体的处理
*/
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sock *rsk;
#ifdef CONFIG_TCP_MD5SIG
/*
* We really want to reject the packet as early as possible
* if:
* o We're expecting an MD5'd packet and this is no MD5 tcp option
* o There is an MD5 option and we're not expecting one
*/
if (tcp_v4_inbound_md5_hash(sk, skb))
goto discard;
#endif

if (sk->sk\_state == TCP\_ESTABLISHED) { /\* Fast path \*/  
    sock\_rps\_save\_rxhash(sk, skb->rxhash);  
    TCP\_CHECK\_TIMER(sk);  
    if (tcp\_rcv\_established(sk, skb, tcp\_hdr(skb), skb->len)) {  
        rsk = sk;  
        goto reset;  
    }  
    TCP\_CHECK\_TIMER(sk);  
    return 0;  
}

if (skb->len < tcp\_hdrlen(skb) || tcp\_checksum\_complete(skb))  
    goto csum\_err;

if (sk->sk\_state == TCP\_LISTEN) { //说明收到的是三次握手第一步SYN或者第三步ACK,这里是服务器端的情况  
    struct sock \*nsk = tcp\_v4\_hnd\_req(sk, skb);  
    if (!nsk)  
        goto discard;

    if (nsk != sk) {//如果是第一次握手的SYN,这里的nsk应该是'父'sk, 如果这里是三次握手的第三步ACK,则这里的nsk是‘子'sk  
        if (tcp\_child\_process(sk, nsk, skb)) { //这里面还是会调用tcp\_rcv\_state\_process  
            rsk = nsk;  
            goto reset;  
        }  
        return 0; //如果是握手的第三步,这里直接退出  
    } //如果是三次握手中的第一步SYN,则继续后面的操作  
} else  
    sock\_rps\_save\_rxhash(sk, skb->rxhash);

//走到这里说明只能是客户端收到SYN+ACK,或者是服务器端收到SYN  
TCP\_CHECK\_TIMER(sk);  
if (tcp\_rcv\_state\_process(sk, skb, tcp\_hdr(skb), skb->len)) {  
    rsk = sk;  
    goto reset;  
}  
TCP\_CHECK\_TIMER(sk);  
return 0;

reset:
tcp_v4_send_reset(rsk, skb);
discard:
kfree_skb(skb);
/* Be careful here. If this function gets more complicated and
* gcc suffers from register pressure on the x86, sk (in %ebx)
* might be destroyed here. This current version compiles correctly,
* but you have been warned.
*/
return 0;

csum_err:
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
}

当客户端connect()之后,sock进入TCP_SYN_SENT状态,并插入到ehash中, 如果是阻塞socket则connect()等待握手完成
本文考虑收到服务端synack的过程,也就是客户端握手的第二阶段;

发送SYN段后,连接的状态变为SYN_SENT。此时如果收到SYNACK段,处理函数为tcp_rcv_state_process()。

对于协议栈的接收路径,

  • tcp_v4_rcv

    • ->__inet_lookup_skb() //在ehash中找到TCP_SYN_SENT状态的sk

    • ->!sock_owned_by_user() //connect()即使阻塞也不占有锁

      • ->!tcp_prepare() //对于synack,不会排入prepare队列

      • ->tcp_v4_do_rcv()

        • ->tcp_rcv_state_process() //进入TCP_SYN_SENT状态处理逻辑

          • -> tcp_rcv_synsent_state_process

整体代码先折叠

int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcphdr *th = tcp_hdr(skb);
struct request_sock *req;
int queued = 0;
bool acceptable;

switch (sk->sk\_state) {  
case TCP\_CLOSE:  
    goto discard;

case TCP\_LISTEN:  
    //服务器端收到SYN  
    /\*  
     \* 在半连接的LISTEN状态下,只处理SYN段。如果是  
     \* ACK段,此时连接尚未开始建立,因此返回1。在调用  
     \* tcp\_rcv\_state\_process()函数中会给对方发送RST段;  
     \* 如果接收的是RST段,则丢弃  
     \*/  
    if (th->ack)  
        return 1;

    if (th->rst)  
        goto discard;

    if (th->syn) {  
        if (th->fin)  
            goto discard;  
        /\*  
         \* 处理SYN段,主要由conn\_request接口(TCP中为tcp\_v4\_conn\_request)处理,  
         \* icsk\_af\_ops成员在创建套接字时被初始化,参见tcp\_v4\_init\_sock()  
         \*/  
         /\*收到三次握手的第一步SYN,  
            则在tcp\_v4\_conn\_request中创建连接请求控制块request\_sock  
            \*/  
        if (icsk->icsk\_af\_ops->conn\_request(sk, skb) < 0)//ipv4\_specific--->tcp\_v4\_conn\_request  
            return 1;

        consume\_skb(skb);  
        return 0;  
    }  
    goto discard;

case TCP\_SYN\_SENT://客户端收到SYN+ACK  
/\*  

对于TCP_SYN_SENT状态的sock,会调用tcp_rcv_synsent_state_process来进行处理
解析tcp选项,获取服务端的支持情况, 比如sack, TFO, wscale, MSS, timestamp等
如果有ack, 进行tcp_ack, 这时候可能fastopen确认了之前的数据
调用tcp_finish_connect,TCP_SYN_SENT->TCP_ESTABLISHED
如果包含fastopen cookie则保存
判断是否需要立即ack还是延时ack
如果包里没有ack,只有syn,则表示相互connect, TCP_SYN_SENT->TCP_SYN_RECV, 并发送synack
*/
tp->rx_opt.saw_tstamp = 0;
queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;

    /\* Do step6 onward by hand. \*/  
    tcp\_urg(sk, skb, th);  
    \_\_kfree\_skb(skb);  
    tcp\_data\_snd\_check(sk);  
    return 0;  
}

tp->rx\_opt.saw\_tstamp = 0;  
req = tp->fastopen\_rsk;  
if (req) {  
    WARN\_ON\_ONCE(sk->sk\_state != TCP\_SYN\_RECV &&  
        sk->sk\_state != TCP\_FIN\_WAIT1);

    if (!tcp\_check\_req(sk, skb, req, true))  
        goto discard;  
}

if (!th->ack && !th->rst && !th->syn)  
    goto discard;

if (!tcp\_validate\_incoming(sk, skb, th, 0))  
    return 0;  

/*
* 处理TCP段ACK标志,tcp_ack()返回非零值表示处理
* ACK段成功,是正常的第三次握手TCP段
*/
/* step 5: check the ACK field */
acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
FLAG_UPDATE_TS_RECENT) > 0;
/*
tcp_rcv_state_process函数中对于ack的处理步骤中,假如连接处于FIN_WAIT_1,
且数据均已经被确认完,则进入TIME_WAIT_2状态;如果无需在该状态等待(linger2<0), 或者收到了乱序数据段,则直接关闭连接;如果需要等待, 则需要判断等待时间与TIMEWAIT时间的大小关系,若>TIMEWAIT_LEN,
则添加TIME_WAIT_2定时器,否则直接进入TIME_WAIT接管(其子状态仍然是FIN_WAIT_2),
接管之后会添加TIME_WAIT定时器;
*/
switch (sk->sk_state) {
case TCP_SYN_RECV:////握手完成时的新建连接的初始状态
if (!acceptable)
return 1;

    if (!tp->srtt\_us)  
        tcp\_synack\_rtt\_meas(sk, req);  

/*/这里是由tcp_v4_do_rcv里面的tcp_child_process走到这里,
在tcp_child_process前会通过tcp_check_req创建一个新的struct sock
Once we leave TCP_SYN_RECV, we no longer need req
* so release it.
*/
if (req) {
tp->total_retrans = req->num_retrans;
reqsk_fastopen_remove(sk, req, false); //回收fastopen req
} else {
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_congestion_control(sk);

        tcp\_mtup\_init(sk);  
        tp->copied\_seq = tp->rcv\_nxt;  
        tcp\_init\_buffer\_space(sk);  
    }  
    smp\_mb();  
    tcp\_set\_state(sk, TCP\_ESTABLISHED);// TCP\_SYN\_RECV->TCP\_ESTABLISHED  
    sk->sk\_state\_change(sk);//sock\_def\_wakeup, 唤醒epoll  

/*
sock_init_data中 有
sk->sk_state_change = sock_def_wakeup;
sk->sk_data_ready = sock_def_readable;
sk->sk_write_space = sock_def_write_space;
sk->sk_error_report = sock_def_error_report;
sk->sk_destruct = sock_def_destruct;
*/
//epoll然后调用ep_send_events->ep_scan_ready_list->ep_send_events_proc->ep_item_poll->tcp_poll
/*
* 设置"子"传输控制块为ESTABLISHED状态
*/
/* Note, that this wakeup is only for marginal crossed SYN case.
* Passively open sockets are not waked up, because
* sk->sk_sleep == NULL and sk->sk_socket == NULL.
*/
/*
* 发信号给那些将通过该套接字发送数据的进程,
* 通知他们套接字目前已经可以发送数据了
sk_state_change()->sock_def_wakeup()->ep_poll_callback(), 添加到epoll的ready list中,并唤醒阻塞中的epoll。
epoll然后调用ep_send_events->ep_scan_ready_list->ep_send_events_proc->ep_item_poll->tcp_poll
*/

    if (sk->sk\_socket)  
        sk\_wake\_async(sk, SOCK\_WAKE\_IO, POLL\_OUT);  
 /\*  
             \* 初始化传输控制块各字段,如果存在时间戳选项,  
             \* 同时平滑RTT为零,则需计算重传超时时间等  
             \*/  
    tp->snd\_una = TCP\_SKB\_CB(skb)->ack\_seq;  
    tp->snd\_wnd = ntohs(th->window) << tp->rx\_opt.snd\_wscale;  
    tcp\_init\_wl(tp, TCP\_SKB\_CB(skb)->seq);

    if (tp->rx\_opt.tstamp\_ok)  
        tp->advmss -= TCPOLEN\_TSTAMP\_ALIGNED;

    if (req) {  
        /\* Re-arm the timer because data may have been sent out.  
         \* This is similar to the regular data transmission case  
         \* when new data has just been ack'ed.  
         \*  
         \* (TFO) - we could try to be more aggressive and  
         \* retransmitting any data sooner based on when they  
         \* are sent out.  
         \*/  
        tcp\_rearm\_rto(sk);  
    } else  
        tcp\_init\_metrics(sk);  

/*
* 为该套接字建立路由,初始化拥塞控制模块
*/
/*
* 初始化与路径MTU有关的成员
*/
tcp_update_pacing_rate(sk);
/*
* 更新最近一次发送数据包的时间
*/
/* Prevent spurious tcp_cwnd_restart() on first data packet */
tp->lsndtime = tcp_time_stamp;

    tcp\_initialize\_rcv\_mss(sk);  
    /\*  
             \* 计算有关TCP首部预测的标志  
             \*/  
    tcp\_fast\_path\_on(tp);  
    break;

case TCP\_FIN\_WAIT1: {  
    struct dst\_entry \*dst;  
    int tmo;

    /\* If we enter the TCP\_FIN\_WAIT1 state and we are a  
     \* Fast Open socket and this is the first acceptable  
     \* ACK we have received, this would have acknowledged  
     \* our SYNACK so stop the SYNACK timer.  
     \*/  
    if (req) {  
        /\* Return RST if ack\_seq is invalid.  
         \* Note that RFC793 only says to generate a  
         \* DUPACK for it but for TCP Fast Open it seems  
         \* better to treat this case like TCP\_SYN\_RECV  
         \* above.  
         \*/  
        if (!acceptable)  
            return 1;  
        /\* We no longer need the request sock. \*/  
        reqsk\_fastopen\_remove(sk, req, false);  
        tcp\_rearm\_rto(sk);  
    }        /\* 发送数据未确认完毕 \*/  
    if (tp->snd\_una != tp->write\_seq)  
        break;

    tcp\_set\_state(sk, TCP\_FIN\_WAIT2); /\* 进入FIN\_WAIT\_2状态 \*/  
    sk->sk\_shutdown |= SEND\_SHUTDOWN;/\* 关闭发送端 \*/

    dst = \_\_sk\_dst\_get(sk);  
    if (dst)/\* 路由缓存确认 \*/  
        dst\_confirm(dst);

    if (!sock\_flag(sk, SOCK\_DEAD)) {  
        /\* Wake up lingering close() \*/  
        sk->sk\_state\_change(sk); /\* 套接口不是DEAD状态,状态发生变化,唤醒等待进程 \*/  
        break;  
    }  

/* linger2<0,无需在FIN_WAIT_2等待 */ if (tp->linger2 < 0 || /* 收到期望序号以后的数据段(data, fin) */ (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt))) {
tcp_done(sk);/* 关闭连接 */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
return 1;
}

    tmo = tcp\_fin\_time(sk); /\* 获取FIN\_WAIT\_2等待时间 \*/  
    if (tmo > TCP\_TIMEWAIT\_LEN) {  /\* > TIMEWAIT\_LEN,加入FIN\_WAIT\_2定时器 \*/  
        inet\_csk\_reset\_keepalive\_timer(sk, tmo - TCP\_TIMEWAIT\_LEN);  
    } else if (th->fin || sock\_owned\_by\_user(sk)) {  
        /\* Bad case. We could lose such FIN otherwise.  
         \* It is not a big problem, but it looks confusing  
         \* and not so rare event. We still can lose it now,  
         \* if it spins in bh\_lock\_sock(), but it is really  
         \* marginal case.  
         \*/ /\* 有fin?? 或者 被用户进程锁定,加入FIN\_WAIT\_2定时器 \*/  
        inet\_csk\_reset\_keepalive\_timer(sk, tmo);  
    } else { /\* 正常等待时间< TIMEWAIT\_LEN,进入TIMEWAIT接管状态 \*/  
        tcp\_time\_wait(sk, TCP\_FIN\_WAIT2, tmo);  
        goto discard;  
    }  
    break;  
}

case TCP\_CLOSING:  
    if (tp->snd\_una == tp->write\_seq) {  
        tcp\_time\_wait(sk, TCP\_TIME\_WAIT, 0);  
        goto discard;  
    }  
    break;

case TCP\_LAST\_ACK:  
    if (tp->snd\_una == tp->write\_seq) {  
        tcp\_update\_metrics(sk);  
        tcp\_done(sk);  
        goto discard;  
    }  
    break;  
}

/\* step 6: check the URG bit \*/  
tcp\_urg(sk, skb, th);  

/*
FIN_WAIT_2状态的走向有以下几个流程触发点,
(1)TIME_WAIT_2定时器未超时时间内,收到数据段触发;
(2)TIME_WAIT_2定时器超时触发;
(3)TIME_WAIT定时器未超时时间内,收到数据段触发;
(4)TIME_WAIT定时器超时触发;
*/
/* step 7: process the segment text */
switch (sk->sk_state) {
case TCP_CLOSE_WAIT:
case TCP_CLOSING:
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2://TIME_WAIT_2定时器未超时时间内,收到数据段触发,如果设置FIN标记,则直接进入TIME_WAIT状态;
/* RFC 793 says to queue data in these states,
* RFC 1122 says we MUST send a reset.
* BSD 4.4 also does reset.
*/
if (sk->sk_shutdown & RCV_SHUTDOWN) {
if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq &&
after(TCP_SKB_CB(skb)->end_seq - th->fin, tp->rcv_nxt)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONDATA);
tcp_reset(sk);
return 1;
}
}
/* Fall through */
case TCP_ESTABLISHED:
tcp_data_queue(sk, skb); //如果带数据部分则处理,比如客户端设置了deferaccept的时候
queued = 1;
break;
}

/\* tcp\_data could move socket to TIME-WAIT \*/  
if (sk->sk\_state != TCP\_CLOSE) {  
    tcp\_data\_snd\_check(sk);//给数据一个发送机会,tcp\_push\_pending\_frame  
    tcp\_ack\_snd\_check(sk);//检查是否有ack被推迟,判断是否需要立即发送  
}

if (!queued) {  

discard:
tcp_drop(sk, skb);
}
return 0;
}
EXPORT_SYMBOL(tcp_rcv_state_process);

参考:https://blog.csdn.net/zhangskd/article/details/47380761

http://www.cnhalo.net/2016/06/13/linux-tcp-synack-rcv/

tcp_rcv_synsent_state_process()用于SYN_SENT状态的处理,具体又分两种场景。
(1) 接收到SYNACK
一般情况下会收到服务端的SYNACK,处理如下:
检查ack_seq是否合法。如果使用了时间戳选项,检查回显的时间戳是否合法。检查TCP的标志位是否合法。如果SYNACK是合法的,更新sock的各种信息。

把连接的状态设置为TCP_ESTABLISHED,唤醒调用connect()的进程。判断是马上发送ACK,还是延迟发送。

(2) 接收到SYN
本端之前发送出一个SYN,现在又接收到了一个SYN,双方同时向对端发起建立连接的请求。
处理如下:把连接状态置为SYN_RECV。更新sock的各种信息。构造和发送SYNACK。接者对端也会回应SYNACK,之后的处理流程和服务器端接收ACK类似

当tcp_rcv_synsent_state_process()的返回值大于0时,会导致上层调用函数发送一个被动的RST。

Q:那么什么情况下此函数的返回值会大于0?

A:收到一个ACK段,但ack_seq的序号不正确,或者回显的时间戳不正确。

分析:tcp_rcv_synsent_state_process

对于TCP_SYN_SENT状态的sock,会调用tcp_rcv_synsent_state_process来进行处理

  • 解析tcp选项,获取服务端的支持情况, 比如sack, TFO, wscale, MSS, timestamp等
  • 如果有ack, 进行tcp_ack, 这时候可能fastopen确认了之前的数据
  • 调用tcp_finish_connect,TCP_SYN_SENT->TCP_ESTABLISHED
  • 如果包含fastopen cookie则保存
  • 判断是否需要立即ack还是延时ack
  • 如果包里没有ack,只有syn,则表示相互connect, TCP_SYN_SENT->TCP_SYN_RECV, 并发送synack

static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
const struct tcphdr *th)
{
struct inet_connection_sock *icsk = inet_csk(sk); //客户端sk
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_cookie foc = { .len = -1 };
int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, 0, &foc); //解析tcp选项,可能带fastopen cookie
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset; //在repair模式下的时间修正
if (th->ack) {
/* rfc793:
* "If the state is SYN-SENT then
* first check the ACK bit
* If the ACK bit is set
* If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
* a reset (unless the RST bit is set, if so drop
* the segment and return)"
*/
if (!after(TCP_SKB_CB(skb)->ack_seq, tp->snd_una) || //初始化的时候snd_una设置为syn序号,返回的ack为syn+1, 或者fastopen的时候更大
after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) //ack的是还没有发送的数据
goto reset_and_undo;
if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
!between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp, //retrans_stamp会在发送syn的时候记录,接收包需要在时间范围内
tcp_time_stamp)) {
NET_INC_STATS(sock_net(sk),
LINUX_MIB_PAWSACTIVEREJECTED);
goto reset_and_undo;
}
/* Now ACK is acceptable.
*
* "If the RST bit is set
* If the ACK was acceptable then signal the user "error:
* connection reset", drop the segment, enter CLOSED state,
* delete TCB, and return."
*/
if (th->rst) {
tcp_reset(sk); //进入TCP_CLOSE状态
goto discard; //丢弃包
}
/* rfc793:
* "fifth, if neither of the SYN or RST bits is set then
* drop the segment and return."
*
* See note below!
* --ANK(990513)
*/
if (!th->syn) //如果rst和syn都没被设置,则丢弃并返回
goto discard_and_undo;
/* rfc793:
* "If the SYN bit is on …
* are acceptable then …
* (our SYN has been ACKed), change the connection
* state to ESTABLISHED…"
*/
tcp_ecn_rcv_synack(tp, th);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
tcp_ack(sk, skb, FLAG_SLOWPATH); //ack确认,有可能fastopen的数据被确认了
/* Ok.. it's good. Set up sequence numbers and
* move to established.
*/
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
*/
tp->snd_wnd = ntohs(th->window); //更新收到的窗口通告
if (!tp->rx_opt.wscale_ok) { // 如果对方不支持wsacle
tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
tp->window_clamp = min(tp->window_clamp, 65535U); //本机发送给对方的最大窗口也不能带wscale的大小
}
if (tp->rx_opt.saw_tstamp) { /* 有时间戳选项 */
/* 在syn中有时间戳选项 */
tp->rx_opt.tstamp_ok = 1;
tp->tcp_header_len = /* tcp首部需要增加时间戳长度 */
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; /* mss需要减去时间戳长度 */
tcp_store_ts_recent(tp);/* 设置回显时间戳 */
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
}
if (tcp_is_sack(tp) && sysctl_tcp_fack) //服务端支持sack,并且系统支持fack,则开启fack
tcp_enable_fack(tp);
tcp_mtup_init(sk); //此时收到对方的tcp MSS选项,可以初始化mtu探测区间
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); //使用pmtu更新探测区间和mss
tcp_initialize_rcv_mss(sk); //更新对对方mss的猜测,不会超过TCP_MSS_DEFAULT=536
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */ /* 记录用户空间待读取的序号 */
tp->copied_seq = tp->rcv_nxt;
smp_mb();
//tcp_finish_connect主要是客户端进入连接完成状态(TCP_ESTABLISHED),可以发送数据了/* 连接建立完成的状态改变和相关初始化 */
tcp_finish_connect(sk, skb); // TCP_SYN_SENT->TCP_ESTABLISHED/* 连接建立完成的状态改变和相关初始化 */
if ((tp->syn_fastopen || tp->syn_data) &&
tcp_rcv_fastopen_synack(sk, skb, &foc)) //fastopen处理,保存cookie
return -1; //有部分数据未确认,重传了
if (sk->sk_write_pending || //还有数据等待写
icsk->icsk_accept_queue.rskq_defer_accept || //client设置了TCP_DEFER_ACCEPT, 先不ack,等待有数据发送的时候
icsk->icsk_ack.pingpong) { //pingpong模式,没有开启快速ack
//延时ack,等待数据一起发送
/* Save one ACK. Data will be ready after
* several ticks, if write_pending is set.
*
* It may be deleted, but with this feature tcpdumps
* look so _wonderfully_ clever, that I was not able
* to stand against the temptation 8) --ANK
*/
inet_csk_schedule_ack(sk); //标记有ack被推迟
icsk->icsk_ack.lrcvtime = tcp_time_stamp; //记录时间
tcp_enter_quickack_mode(sk); // 进入快速ack模式,加速慢启动
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, //重置延时ack定时器
TCP_DELACK_MAX, TCP_RTO_MAX);
discard:
tcp_drop(sk, skb);
return 0;
} else {
tcp_send_ack(sk); //不需要等待,立即发送ack
}
return -1;
}
/* No ACK in the segment */
//没有ack,但是待rst, 忽略这个包
if (th->rst) {
/* rfc793:
* "If the RST bit is set
*
* Otherwise (no ACK) drop the segment and return."
*/
goto discard_and_undo;
}
/* PAWS check. */
if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
tcp_paws_reject(&tp->rx_opt, 0)) //paws检测时间戳
goto discard_and_undo;
if (th->syn) { /* 收到了SYN段,即同时打开 *///相互connect
/* We see SYN without ACK. It is attempt of
* simultaneous connect with crossed SYNs.
* Particularly, it can be connect to self.
*/

if (tp->rx_opt.saw_tstamp) {
tp->rx_opt.tstamp_ok = 1;
tcp_store_ts_recent(tp);/* 记录对端的时间戳,作为下次发送的回显值 */
tp->tcp_header_len =
sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
tp->tcp_header_len = sizeof(struct tcphdr);
}
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;/* 更新接收窗口的要接收的下一个序号 */
tp->copied_seq = tp->rcv_nxt;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;/* 更新接收窗口的左端 */
/* RFC1323: The window in SYN & SYN/ACK segments is
* never scaled.
*/* 更新对端接收窗口的大小。在三次握手时,不使用窗口扩大因子。
tp->snd_wnd = ntohs(th->window);
tp->snd_wl1 = TCP_SKB_CB(skb)->seq;/* 记录最近更新发送窗口的ACK序号 */
tp->max_window = tp->snd_wnd;/* 目前见过的对端的最大通告窗口 */
tcp_ecn_rcv_syn(tp, th);
tcp_mtup_init(sk);/* TCP的MTU初始化 mss更新 */
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);/* 对端有效发送MSS估值的初始化 */
tcp_send_synack(sk); //发送synack
#if 0
/* Note, we could accept data and URG from this segment.
* There are no obstacles to make this (except that we must
* either change tcp_recvmsg() to prevent it from returning data
* before 3WHS completes per RFC793, or employ TCP Fast Open).
*
* However, if we ignore data in ACKless segments sometimes,
* we have no reasons to accept it sometimes.
* Also, seems the code doing it in step6 of tcp_rcv_state_process
* is not flawless. So, discard packet for sanity.
* Uncomment this return to process the data.
*/
return -1;
#else
goto discard;
#endif
}
/* "fifth, if neither of the SYN or RST bits is set then
* drop the segment and return."
*/
discard_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
goto discard;
reset_and_undo:
tcp_clear_options(&tp->rx_opt);
tp->rx_opt.mss_clamp = saved_clamp;
return 1;
}

同时打开时,在SYN_SENT状态,收到SYN段后,状态变为SYN_RECV,然后发送SYNACK。之后如果收到合法的SYNACK后,就能完成连接的建立。

/*
tcp_finish_connect()用来完成连接的建立,主要做了以下事情:

  1. 把连接的状态从SYN_SENT置为ESTABLISHED。
  2. 根据路由缓存,初始化TCP相关的变量。
  3. 获取默认的拥塞控制算法。
  4. 调整发送缓存和接收缓存的大小。
  5. 如果使用了SO_KEEPALIVE选项,激活保活定时器。
  6. 唤醒此socket等待队列上的进程(即调用connect的进程)。
    7 如果使用了异步通知,则发送SIGIO通知异步通知队列上的进程可写。

*/
void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);

tcp\_set\_state(sk, TCP\_ESTABLISHED);&nbsp;/\* 设置为已连接状态 \*/

if (skb) {/\* 设置接收路由缓存 \*/  
    icsk->icsk\_af\_ops->sk\_rx\_dst\_set(sk, skb);//inet\_sk\_rx\_dst\_set  
    security\_inet\_conn\_established(sk, skb);  
}

/\* Make sure socket is routed, for correct metrics.  \*/  
icsk->icsk\_af\_ops->rebuild\_header(sk);&nbsp;/\* 检查或重建路由 \*/

tcp\_init\_metrics(sk);//创建初始化tcp metric

tcp\_init\_congestion\_control(sk);//调用拥塞算法init函数

/\* Prevent spurious tcp\_cwnd\_restart() on first data  
 \* packet.  
 \*/  
tp->lsndtime = tcp\_time\_stamp;&nbsp;&nbsp; /\* 记录最后一次发送数据包的时间 \*/

tcp\_init\_buffer\_space(sk);//根据收到的对端信息初始化缓存配置

if (sock\_flag(sk, SOCK\_KEEPOPEN))&nbsp;/\* 开启了保活,则打开保活定时器 \*/  
    inet\_csk\_reset\_keepalive\_timer(sk, keepalive\_time\_when(tp));

if (!tp->rx\_opt.snd\_wscale)//对方没有开启wscale窗口影子,则开启快速路径/\* 设置预测标志,判断快慢路径的条件之一 \*/  
    \_\_tcp\_fast\_path\_on(tp, tp->snd\_wnd);  
else  
    tp->pred\_flags = 0;

if (!sock\_flag(sk, SOCK\_DEAD)) {  
    sk->sk\_state\_change(sk);/\* 指向sock\_def\_wakeup,唤醒调用connect()的进程 \*/  
    sk\_wake\_async(sk, SOCK\_WAKE\_IO, POLL\_OUT);/\* 如果使用了异步通知,则发送SIGIO通知进程可写 \*/  
}  

}