在新内核的实现中ofo队列实际上是一颗红黑树。
在tcp_data_queue_ofo中根据序号,查找到合适位置,合并或者添加到rbtree中。
同时设置dsack和sack,准备ack给发送方。
//http://abcdxyzk.github.io/blog/2015/04/01/kernel-net-data-queue/
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct rb_node **p, *q, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
bool fragstolen;
/*如果收到乱序包 ,可能在传输过程中出现了 拥塞
所以检查ecn 标志如果是路由器 拥塞会设置这个标志 说明路径上存在拥塞,需要
给发送方 接收方进行拥塞处理 如果没有拥塞 需要尽快通知 发送方*/
tcp_ecn_check_ce(tp, skb);
if (unlikely(tcp\_try\_rmem\_schedule(sk, skb, skb->truesize))) {//接收缓存不够
NET\_INC\_STATS(sock\_net(sk), LINUX\_MIB\_TCPOFODROP);
tcp\_drop(sk, skb);//接收缓存不够 丢弃
return;
}
/\* Disable header prediction. \*/
tp->pred\_flags = 0;//收到乱序包,关闭快速路径
inet\_csk\_schedule\_ack(sk);//乱序包会快速ack
NET\_INC\_STATS(sock\_net(sk), LINUX\_MIB\_TCPOFOQUEUE);
seq = TCP\_SKB\_CB(skb)->seq;
end\_seq = TCP\_SKB\_CB(skb)->end\_seq;
SOCK\_DEBUG(sk, "out of order segment: rcv\_next %X seq %X - %X\\n",
tp->rcv\_nxt, seq, end\_seq);
p = &tp->out\_of\_order\_queue.rb\_node;
if (RB\_EMPTY\_ROOT(&tp->out\_of\_order\_queue)) {//ofo队列中为空,简单插入新的sack
/\* Initial out of order segment, build 1 SACK. \*/
if (tcp\_is\_sack(tp)) {
tp->rx\_opt.num\_sacks = 1;
tp->selective\_acks\[0\].start\_seq = seq;
tp->selective\_acks\[0\].end\_seq = end\_seq;
}
rb\_link\_node(&skb->rbnode, NULL, p);
rb\_insert\_color(&skb->rbnode, &tp->out\_of\_order\_queue);
tp->ooo\_last\_skb = skb;
goto end;
}
/\* In the typical case, we are adding an skb to the end of the list.
\* Use of ooo\_last\_skb avoids the O(Log(N)) rbtree lookup.
\*/
if (tcp\_try\_coalesce(sk, tp->ooo\_last\_skb, skb, &fragstolen)) {//对于普遍场景,先尝试合并skb到上一个乱序包
coalesce_done://合并完成
tcp_grow_window(sk, skb); //尝试增加窗口通告
kfree_skb_partial(skb, fragstolen);//skb已经被合并,可以释放
skb = NULL;
goto add_sack;
}
/* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {//如果序号比ooo_last_skb大,则可以直接添加,避免查找
parent = &tp->ooo_last_skb->rbnode;
p = &parent->rb_right;//添加到ooo_last_skb的右子树
goto insert;
}
/\* Find place to insert this segment. Handle overlaps on the way. \*/
parent = NULL;//需要查找这个ofo包的添加位置
while (\*p) {
parent = \*p;
skb1 = rb\_entry(parent, struct sk\_buff, rbnode);
if (before(seq, TCP\_SKB\_CB(skb1)->seq)) {
p = &parent->rb\_left;//比当前节点小,添加到左子树
continue;
}
if (before(seq, TCP\_SKB\_CB(skb1)->end\_seq)) {
if (!after(end\_seq, TCP\_SKB\_CB(skb1)->end\_seq)) {//序号所有部分都已经在当前节点
/\* All the bits are present. Drop. \*/
NET\_INC\_STATS(sock\_net(sk),
LINUX\_MIB\_TCPOFOMERGE);
\_\_kfree\_skb(skb);
skb = NULL;
tcp\_dsack\_set(sk, seq, end\_seq);
goto add\_sack;
}
if (after(seq, TCP\_SKB\_CB(skb1)->seq)) {//有部分重叠
/\* Partial overlap. \*/
tcp\_dsack\_set(sk, seq, TCP\_SKB\_CB(skb1)->end\_seq);//设置重叠部分dsack
} else {//skb1->seq = seq <= skb1->end\_seq < end\_seq
/\* skb's seq == skb1's seq and skb covers skb1.
\* Replace skb1 with skb.
\*///skb中包含了全部的skb1
//使用skb替换skb1
rb\_replace\_node(&skb1->rbnode, &skb->rbnode,
&tp->out\_of\_order\_queue);
//设置或合并现有dsack设置 //因为skb包含了全部skb1部分,则整个skb1都被重传了
tcp\_dsack\_extend(sk,
TCP\_SKB\_CB(skb1)->seq,
TCP\_SKB\_CB(skb1)->end\_seq);
NET\_INC\_STATS(sock\_net(sk),
LINUX\_MIB\_TCPOFOMERGE);
//释放skb1
\_\_kfree\_skb(skb1);
goto merge\_right;//还要继续查看skb1的右子数有没有需要合并的部分
}
} else if (tcp\_try\_coalesce(sk, skb1, skb, &fragstolen)) { // skb1->seq < skb1->end\_seq <= seq
goto coalesce\_done;//尝试合并
}
p = &parent->rb\_right;//比当前加点大,查找右子树
}
insert:
/* Insert segment into RB tree. *///找到合适位置后插入ofo队列
rb_link_node(&skb->rbnode, parent, p);
rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
merge_right:
/* Remove other segments covered by skb. */
while ((q = rb_next(&skb->rbnode)) != NULL) {//查看右子树中有没需要合并的节点
skb1 = rb_entry(q, struct sk_buff, rbnode);
if (!after(end\_seq, TCP\_SKB\_CB(skb1)->seq))//没有交集,不需要合并
break;
if (before(end\_seq, TCP\_SKB\_CB(skb1)->end\_seq)) {//有交集
tcp\_dsack\_extend(sk, TCP\_SKB\_CB(skb1)->seq,
end\_seq);//更新dsack
break;
}
//完全包含当前节点,删除该节点,并更新dsack
rb\_erase(&skb1->rbnode, &tp->out\_of\_order\_queue);
tcp\_dsack\_extend(sk, TCP\_SKB\_CB(skb1)->seq,
TCP\_SKB\_CB(skb1)->end\_seq);
NET\_INC\_STATS(sock\_net(sk), LINUX\_MIB\_TCPOFOMERGE);
tcp\_drop(sk, skb1);//可以删除skb1
}
/\* If there is no skb after us, we are the last\_skb ! \*/
if (!q)//没有下一个skb了,更新ooo\_last\_skb
tp->ooo\_last\_skb = skb;
add_sack:
if (tcp_is_sack(tp))
tcp_sack_new_ofo_skb(sk, seq, end_seq);
end:
if (skb) {//没有被合并//跟in-order包一样,调整窗口
tcp_grow_window(sk, skb);
skb_condense(skb);
skb_set_owner_r(skb, sk);
}
}
手机扫一扫
移动阅读更方便
你可能感兴趣的文章