/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
* be closed and the error returned to the user. If err > 0
* it's just the icmp type << 8 | icmp code. After adjustment
* header points to the first 8 bytes of the tcp header. We need
* to find the appropriate port.
*
* The locking strategy used here is very "optimistic". When
* someone else accesses the socket the ICMP is just dropped
* and for some paths there is no check at all.
* A more general error queue to queue errors for later handling
* is probably better.
*
*/
/*
* 目的不可达、源端被关闭、超时、参数错误这四种类型
* 的差错ICMP报文,都是由同一个函数icmp_unreach()来处理的,
* 对其中目的不可达、源端被关闭这两种类型ICMP报文
* 因要提取某些信息而需作一些特殊的处理,而另外
* 一些则不需要,根据差错报文中的信息直接调用
* 传输层的错误处理例程。参见
CMP差错报文的数据部分包括:原始数据报的IP首部再加上前8个字节的数据部分(2字节源端口+2字节目的端口+4字节序号)
*/
void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)icmp_skb->data;
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
struct inet_connection_sock *icsk;
struct tcp_sock *tp;
struct inet_sock *inet;
const int type = icmp_hdr(icmp_skb)->type;
const int code = icmp_hdr(icmp_skb)->code;
struct sock *sk;
struct sk_buff *skb;
__u32 seq;
__u32 remaining;
int err;
struct net *net = dev_net(icmp_skb->dev);
/\*
\* 检测ICMP报文长度是否包含了原始IP首部和原始IP数据包中
\* 前8字节数据,如果不完整则返回
\*/
if (icmp\_skb->len < (iph->ihl << 2) + 8) {
ICMP\_INC\_STATS\_BH(net, ICMP\_MIB\_INERRORS);
return;
}
/\*
\* 通过从ICMP报文数据中获取的原始TCP首部中源端口号和IP首部
\* 中源地址,得到发送该TCP报文的传输控制块。如果获取失败,
\* 则说明ICMP报文有误或该套接字已关闭;如果获取传输控制块
\* 的TCP状态为TIME\_WAIT,则说明套接字即将关闭,这两种情况
\* 都无需进一步处理
\*/
sk = inet\_lookup(net, &tcp\_hashinfo, iph->daddr, th->dest,
iph->saddr, th->source, inet\_iif(icmp\_skb));
if (!sk) {
ICMP\_INC\_STATS\_BH(net, ICMP\_MIB\_INERRORS);
return;
}
if (sk->sk\_state == TCP\_TIME\_WAIT) {
inet\_twsk\_put(inet\_twsk(sk));
return;
}
bh\_lock\_sock(sk);
/\* If too many ICMPs get dropped on busy
\* servers this needs to be solved differently.
\*//\*
\* 如果此时该传输控制块被用户进程锁定(如用户进程正在调用
\* send等系统调用),则需累计相关SNMP的统计量
\*/
if (sock\_owned\_by\_user(sk))
NET\_INC\_STATS\_BH(net, LINUX\_MIB\_LOCKDROPPEDICMPS);
if (sk->sk\_state == TCP\_CLOSE)
goto out;
if (unlikely(iph->ttl < inet\_sk(sk)->min\_ttl)) {
NET\_INC\_STATS\_BH(net, LINUX\_MIB\_TCPMINTTLDROP);
goto out;
}
/\*
\* 如果传输控制块不再侦听状态,且序号不再已发送未确认的区间内,则
\* ICMP报文异常,无需进一步处理
\*/
icsk = inet\_csk(sk);
tp = tcp\_sk(sk);
seq = ntohl(th->seq);
if (sk->sk\_state != TCP\_LISTEN &&
!between(seq, tp->snd\_una, tp->snd\_nxt)) {
NET\_INC\_STATS\_BH(net, LINUX\_MIB\_OUTOFWINDOWICMPS);
goto out;
}
switch (type) {
case ICMP\_SOURCE\_QUENCH:
/\* Just silently ignore these. \*/
goto out;
case ICMP\_PARAMETERPROB:
err = EPROTO;
break;
/\*
\* 处理目的不可达类型,首先检测代码的合法性,然后根据
\* 代码具体处理:如果需要分片而设置了不可分片,则调用
\* do\_pmtu\_discovery()探测路径MTU;其他编码,则获取
\* 对应的错误码
\*/
case ICMP\_DEST\_UNREACH:
if (code > NR\_ICMP\_UNREACH)
goto out;
if (code == ICMP\_FRAG\_NEEDED) { /\* PMTU discovery (RFC1191) \*/
if (!sock\_owned\_by\_user(sk))
do\_pmtu\_discovery(sk, iph, info);
goto out;
}
err = icmp\_err\_convert\[code\].errno;
/\* check if icmp\_skb allows revert of backoff
\* (see draft-zimmermann-tcp-lcd) \*/
if (code != ICMP\_NET\_UNREACH && code != ICMP\_HOST\_UNREACH)
break;
if (seq != tp->snd\_una || !icsk->icsk\_retransmits ||
!icsk->icsk\_backoff)
break;
if (sock\_owned\_by\_user(sk))
break;
icsk->icsk\_backoff--;
inet\_csk(sk)->icsk\_rto = \_\_tcp\_set\_rto(tp) <<
icsk->icsk\_backoff;
tcp\_bound\_rto(sk);
skb = tcp\_write\_queue\_head(sk);
BUG\_ON(!skb);
remaining = icsk->icsk\_rto - min(icsk->icsk\_rto,
tcp\_time\_stamp - TCP\_SKB\_CB(skb)->when);
if (remaining) {
inet\_csk\_reset\_xmit\_timer(sk, ICSK\_TIME\_RETRANS,
remaining, TCP\_RTO\_MAX);
} else {
/\* RTO revert clocked out retransmission.
\* Will retransmit now \*/
tcp\_retransmit\_timer(sk);
}
break;
case ICMP\_TIME\_EXCEEDED:
err = EHOSTUNREACH;
break;
default:
goto out;
}
switch (sk->sk\_state) {
struct request\_sock \*req, \*\*prev;
case TCP\_LISTEN:
/\*
\* 如果传输控制块被用户进程锁定,则不作进一步处理
\*/
if (sock\_owned\_by\_user(sk))
goto out;
/\*
\* 由于处于监听状态,因此根据目的端口号、源地址和目的地址查找
\* 正在连接的对端套接字,如果查找失败则不作进一步处理
\*/
req = inet\_csk\_search\_req(sk, &prev, th->dest,
iph->daddr, iph->saddr);
if (!req)
goto out;
/\* ICMPs are not backlogged, hence we cannot get
an established socket here.
\*/
WARN\_ON(req->sk);
/\*
\* 如果发送出去TCP段的序号不等于对端套接字中的发送序号,
\* 则说明序号有误,不作进一步处理
\*/
if (seq != tcp\_rsk(req)->snt\_isn) {
NET\_INC\_STATS\_BH(net, LINUX\_MIB\_OUTOFWINDOWICMPS);
goto out;
}
/\*
\* Still in SYN\_RECV, just remove it silently.
\* There is no good way to pass the error to the newly
\* created socket, and POSIX does not want network
\* errors returned from accept().
\*/
/\*
\* 删除并释放连接过程中的传输控制块
\*/
inet\_csk\_reqsk\_queue\_drop(sk, req, prev);
goto out;
case TCP\_SYN\_SENT:
case TCP\_SYN\_RECV: /\* Cannot happen.
It can f.e. if SYNs crossed.
\*//\*
\* 如果传输控制块没有被用户进程锁定,则将错误码设置到sk\_err,
\* 调用该套接字的错误报告借口函数,关闭套接字;否则将错误码
\* 设置到sk\_err\_soft,在这种情况下用户进程可使用SO\_ERROR套接
\* 字选项获取错误码
\*/
if (!sock\_owned\_by\_user(sk)) {
sk->sk\_err = err;
sk->sk\_error\_report(sk);
tcp\_done(sk);
} else {
sk->sk\_err\_soft = err;
}
goto out;
}
/\* If we've already connected we will keep trying
\* until we time out, or the user gives up.
\*
\* rfc1122 4.2.3.9 allows to consider as hard errors
\* only PROTO\_UNREACH and PORT\_UNREACH (well, FRAG\_FAILED too,
\* but it is obsoleted by pmtu discovery).
\*
\* Note, that in modern internet, where routing is unreliable
\* and in each dark corner broken firewalls sit, sending random
\* errors ordered by their masters even this two messages finally lose
\* their original sense (even Linux sends invalid PORT\_UNREACHs)
\*
\* Now we are in compliance with RFCs.
\* --ANK (980905)
\*/
/\*
\* 到这一步,则传输控制块一定不再LISTEN、SYN\_SENT或SYN\_RECV状态,
\* 此时如果控制块没有被用户进程锁定,并且允许接收扩展的可靠错误
\* 信息,则设置得到的错误码,然后通知错误;否则将错误码设置到sk\_err\_soft
\*/
inet = inet\_sk(sk);
if (!sock\_owned\_by\_user(sk) && inet->recverr) {
sk->sk\_err = err;
sk->sk\_error\_report(sk);
} else { /\* Only an error on timeout \*/
sk->sk\_err\_soft = err;
}
out:
bh_unlock_sock(sk);
sock_put(sk);
}
手机扫一扫
移动阅读更方便
你可能感兴趣的文章