主动关闭 time-wait 2msl 处理
阅读原文时间:2023年07月09日阅读:2

先上传后面整理

/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should * be closed and the error returned to the user. If err > 0
* it's just the icmp type << 8 | icmp code. After adjustment * header points to the first 8 bytes of the tcp header. We need * to find the appropriate port. * * The locking strategy used here is very "optimistic". When * someone else accesses the socket the ICMP is just dropped * and for some paths there is no check at all. * A more general error queue to queue errors for later handling * is probably better. * */ /* * 目的不可达、源端被关闭、超时、参数错误这四种类型 * 的差错ICMP报文,都是由同一个函数icmp_unreach()来处理的, * 对其中目的不可达、源端被关闭这两种类型ICMP报文 * 因要提取某些信息而需作一些特殊的处理,而另外 * 一些则不需要,根据差错报文中的信息直接调用 * 传输层的错误处理例程。参见
CMP差错报文的数据部分包括:原始数据报的IP首部再加上前8个字节的数据部分(2字节源端口+2字节目的端口+4字节序号)
*/
void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)icmp_skb->data;
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2)); struct inet_connection_sock *icsk; struct tcp_sock *tp; struct inet_sock *inet; const int type = icmp_hdr(icmp_skb)->type;
const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
struct sk_buff *skb;
__u32 seq;
__u32 remaining;
int err;
struct net *net = dev_net(icmp_skb->dev);

/\*  
 \* 检测ICMP报文长度是否包含了原始IP首部和原始IP数据包中  
 \* 前8字节数据,如果不完整则返回  
 \*/  
if (icmp\_skb->len < (iph->ihl << 2) + 8) {  
    ICMP\_INC\_STATS\_BH(net, ICMP\_MIB\_INERRORS);  
    return;  
}

/\*  
 \* 通过从ICMP报文数据中获取的原始TCP首部中源端口号和IP首部  
 \* 中源地址,得到发送该TCP报文的传输控制块。如果获取失败,  
 \* 则说明ICMP报文有误或该套接字已关闭;如果获取传输控制块  
 \* 的TCP状态为TIME\_WAIT,则说明套接字即将关闭,这两种情况  
 \* 都无需进一步处理  
 \*/  
sk = inet\_lookup(net, &tcp\_hashinfo, iph->daddr, th->dest,  
        iph->saddr, th->source, inet\_iif(icmp\_skb));  
if (!sk) {  
    ICMP\_INC\_STATS\_BH(net, ICMP\_MIB\_INERRORS);  
    return;  
}  
if (sk->sk\_state == TCP\_TIME\_WAIT) {  
    inet\_twsk\_put(inet\_twsk(sk));  
    return;  
}

bh\_lock\_sock(sk);  
/\* If too many ICMPs get dropped on busy  
 \* servers this needs to be solved differently.  
 \*//\*  
 \* 如果此时该传输控制块被用户进程锁定(如用户进程正在调用  
 \* send等系统调用),则需累计相关SNMP的统计量  
 \*/  
if (sock\_owned\_by\_user(sk))  
    NET\_INC\_STATS\_BH(net, LINUX\_MIB\_LOCKDROPPEDICMPS);

if (sk->sk\_state == TCP\_CLOSE)  
    goto out;

if (unlikely(iph->ttl < inet\_sk(sk)->min\_ttl)) {  
    NET\_INC\_STATS\_BH(net, LINUX\_MIB\_TCPMINTTLDROP);  
    goto out;  
}

/\*  
     \* 如果传输控制块不再侦听状态,且序号不再已发送未确认的区间内,则  
     \* ICMP报文异常,无需进一步处理  
     \*/

icsk = inet\_csk(sk);  
tp = tcp\_sk(sk);  
seq = ntohl(th->seq);  
if (sk->sk\_state != TCP\_LISTEN &&  
    !between(seq, tp->snd\_una, tp->snd\_nxt)) {  
    NET\_INC\_STATS\_BH(net, LINUX\_MIB\_OUTOFWINDOWICMPS);  
    goto out;  
}

switch (type) {  
case ICMP\_SOURCE\_QUENCH:  
    /\* Just silently ignore these. \*/  
    goto out;  
case ICMP\_PARAMETERPROB:  
    err = EPROTO;  
    break;  
    /\*  
 \* 处理目的不可达类型,首先检测代码的合法性,然后根据  
 \* 代码具体处理:如果需要分片而设置了不可分片,则调用  
 \* do\_pmtu\_discovery()探测路径MTU;其他编码,则获取  
 \* 对应的错误码  
 \*/  
case ICMP\_DEST\_UNREACH:  
    if (code > NR\_ICMP\_UNREACH)  
        goto out;

    if (code == ICMP\_FRAG\_NEEDED) { /\* PMTU discovery (RFC1191) \*/  
        if (!sock\_owned\_by\_user(sk))  
            do\_pmtu\_discovery(sk, iph, info);  
        goto out;  
    }

    err = icmp\_err\_convert\[code\].errno;  
    /\* check if icmp\_skb allows revert of backoff  
     \* (see draft-zimmermann-tcp-lcd) \*/  
    if (code != ICMP\_NET\_UNREACH && code != ICMP\_HOST\_UNREACH)  
        break;  
    if (seq != tp->snd\_una  || !icsk->icsk\_retransmits ||  
        !icsk->icsk\_backoff)  
        break;

    if (sock\_owned\_by\_user(sk))  
        break;

    icsk->icsk\_backoff--;  
    inet\_csk(sk)->icsk\_rto = \_\_tcp\_set\_rto(tp) <<  
                 icsk->icsk\_backoff;  
    tcp\_bound\_rto(sk);

    skb = tcp\_write\_queue\_head(sk);  
    BUG\_ON(!skb);

    remaining = icsk->icsk\_rto - min(icsk->icsk\_rto,  
            tcp\_time\_stamp - TCP\_SKB\_CB(skb)->when);

    if (remaining) {  
        inet\_csk\_reset\_xmit\_timer(sk, ICSK\_TIME\_RETRANS,  
                      remaining, TCP\_RTO\_MAX);  
    } else {  
        /\* RTO revert clocked out retransmission.  
         \* Will retransmit now \*/  
        tcp\_retransmit\_timer(sk);  
    }

    break;  
case ICMP\_TIME\_EXCEEDED:  
    err = EHOSTUNREACH;  
    break;  
default:  
    goto out;  
}

switch (sk->sk\_state) {  
    struct request\_sock \*req, \*\*prev;  
case TCP\_LISTEN:  
    /\*  
     \* 如果传输控制块被用户进程锁定,则不作进一步处理  
     \*/  
    if (sock\_owned\_by\_user(sk))  
        goto out;

    /\*  
     \* 由于处于监听状态,因此根据目的端口号、源地址和目的地址查找  
     \* 正在连接的对端套接字,如果查找失败则不作进一步处理  
     \*/  
    req = inet\_csk\_search\_req(sk, &prev, th->dest,  
                  iph->daddr, iph->saddr);  
    if (!req)  
        goto out;

    /\* ICMPs are not backlogged, hence we cannot get  
       an established socket here.  
     \*/  
    WARN\_ON(req->sk);

    /\*  
     \* 如果发送出去TCP段的序号不等于对端套接字中的发送序号,  
     \* 则说明序号有误,不作进一步处理  
     \*/  
    if (seq != tcp\_rsk(req)->snt\_isn) {  
        NET\_INC\_STATS\_BH(net, LINUX\_MIB\_OUTOFWINDOWICMPS);  
        goto out;  
    }

    /\*  
     \* Still in SYN\_RECV, just remove it silently.  
     \* There is no good way to pass the error to the newly  
     \* created socket, and POSIX does not want network  
     \* errors returned from accept().  
     \*/  
     /\*  
     \* 删除并释放连接过程中的传输控制块  
     \*/  
    inet\_csk\_reqsk\_queue\_drop(sk, req, prev);  
    goto out;

case TCP\_SYN\_SENT:  
case TCP\_SYN\_RECV:  /\* Cannot happen.  
               It can f.e. if SYNs crossed.  
             \*//\*  
     \* 如果传输控制块没有被用户进程锁定,则将错误码设置到sk\_err,  
     \* 调用该套接字的错误报告借口函数,关闭套接字;否则将错误码  
     \* 设置到sk\_err\_soft,在这种情况下用户进程可使用SO\_ERROR套接  
     \* 字选项获取错误码  
     \*/  
    if (!sock\_owned\_by\_user(sk)) {  
        sk->sk\_err = err;

        sk->sk\_error\_report(sk);

        tcp\_done(sk);  
    } else {  
        sk->sk\_err\_soft = err;  
    }  
    goto out;  
}

/\* If we've already connected we will keep trying  
 \* until we time out, or the user gives up.  
 \*  
 \* rfc1122 4.2.3.9 allows to consider as hard errors  
 \* only PROTO\_UNREACH and PORT\_UNREACH (well, FRAG\_FAILED too,  
 \* but it is obsoleted by pmtu discovery).  
 \*  
 \* Note, that in modern internet, where routing is unreliable  
 \* and in each dark corner broken firewalls sit, sending random  
 \* errors ordered by their masters even this two messages finally lose  
 \* their original sense (even Linux sends invalid PORT\_UNREACHs)  
 \*  
 \* Now we are in compliance with RFCs.  
 \*                            --ANK (980905)  
 \*/

/\*  
 \* 到这一步,则传输控制块一定不再LISTEN、SYN\_SENT或SYN\_RECV状态,  
 \* 此时如果控制块没有被用户进程锁定,并且允许接收扩展的可靠错误  
 \* 信息,则设置得到的错误码,然后通知错误;否则将错误码设置到sk\_err\_soft  
 \*/  
inet = inet\_sk(sk);  
if (!sock\_owned\_by\_user(sk) && inet->recverr) {  
    sk->sk\_err = err;  
    sk->sk\_error\_report(sk);  
} else    { /\* Only an error on timeout \*/  
    sk->sk\_err\_soft = err;  
}

out:
bh_unlock_sock(sk);
sock_put(sk);
}

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器