重传定时器
<h2>概述</h2>
<p>1、重传的是发送队列的首包:tcp_write_queue_head(sk)</p>
<h2>分析</h2>
<p>设置定时器:</p>
<pre><code class="language-c">// 其中 inet_csk(sk)-&gt;icsk_rto 为超时时间
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)-&gt;icsk_rto, TCP_RTO_MAX);
</code></pre>
<p>定义:</p>
<pre><code class="language-c">// file: include/net/inet_connection-sock.h
/*
* Reset the retransmission timer
*/
static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
unsigned long when,
const unsigned long max_when)
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (when &gt; max_when) { // 不能超过最大值
#ifdef INET_CSK_DEBUG
pr_debug(&quot;reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n&quot;,
sk, what, when, current_text_addr());
#endif
when = max_when;
}
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0 ||
what == ICSK_TIME_EARLY_RETRANS || what == ICSK_TIME_LOSS_PROBE) {
icsk-&gt;icsk_pending = what;
icsk-&gt;icsk_timeout = jiffies + when;
sk_reset_timer(sk, &amp;icsk-&gt;icsk_retransmit_timer, icsk-&gt;icsk_timeout);
} else if (what == ICSK_TIME_DACK) {
icsk-&gt;icsk_ack.pending |= ICSK_ACK_TIMER;
icsk-&gt;icsk_ack.timeout = jiffies + when;
sk_reset_timer(sk, &amp;icsk-&gt;icsk_delack_timer, icsk-&gt;icsk_ack.timeout);
}
#ifdef INET_CSK_DEBUG
else {
pr_debug(&quot;%s&quot;, inet_csk_timer_bug_msg);
}
#endif
}
</code></pre>
<p>删除定时器:</p>
<pre><code class="language-c">// file: net/ipv4/tcp_input.c
/* Restart timer after forward progress on connection.
* RFC2988 recommends to restart timer to now+rto.
*/
void tcp_rearm_rto(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* If the retrans timer is currently being used by Fast Open
* for SYN-ACK retrans purpose, stay put.
*/
if (tp-&gt;fastopen_rsk)
return;
if (!tp-&gt;packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
} else {
u32 rto = inet_csk(sk)-&gt;icsk_rto;
/* Offset the time elapsed after installing regular RTO */
if (icsk-&gt;icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk-&gt;icsk_pending == ICSK_TIME_LOSS_PROBE) {
s32 delta = tcp_rto_delta(sk);
/* delta may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
rto = max_t(int, delta, 1);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
}
}
</code></pre>
<h2>定时器超时处理</h2>
<p>调用关系还未梳理,直接参考书的结论:</p>
<pre><code class="language-c">// file: net/ipv4/tcp_timer.c
static void tcp_write_timer(unsigned long data)
{
struct sock *sk = (struct sock *)data;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_write_timer_handler(sk); // 继续
} else {
/* deleguate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &amp;tcp_sk(sk)-&gt;tsq_flags))
sock_hold(sk);
}
bh_unlock_sock(sk);
sock_put(sk);
}
void tcp_write_timer_handler(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
int event;
if (((1 &lt;&lt; sk-&gt;sk_state) &amp; (TCPF_CLOSE | TCPF_LISTEN)) ||
!icsk-&gt;icsk_pending)
goto out;
if (time_after(icsk-&gt;icsk_timeout, jiffies)) {
sk_reset_timer(sk, &amp;icsk-&gt;icsk_retransmit_timer, icsk-&gt;icsk_timeout);
goto out;
}
event = icsk-&gt;icsk_pending; // 定时器类型
switch (event) {
case ICSK_TIME_EARLY_RETRANS:
tcp_resume_early_retransmit(sk);
break;
case ICSK_TIME_LOSS_PROBE:
tcp_send_loss_probe(sk);
break;
case ICSK_TIME_RETRANS: // 通常是这个类型
icsk-&gt;icsk_pending = 0;
tcp_retransmit_timer(sk);
break;
case ICSK_TIME_PROBE0:
icsk-&gt;icsk_pending = 0;
tcp_probe_timer(sk);
break;
}
out:
sk_mem_reclaim(sk);
}
/*
* The TCP retransmit timer.
*/
void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
if (tp-&gt;fastopen_rsk) {
WARN_ON_ONCE(sk-&gt;sk_state != TCP_SYN_RECV &amp;&amp;
sk-&gt;sk_state != TCP_FIN_WAIT1);
tcp_fastopen_synack_timer(sk);
/* Before we receive ACK to our SYN-ACK don't retransmit
* anything else (e.g., data or FIN segments).
*/
return;
}
if (!tp-&gt;packets_out)
goto out;
WARN_ON(tcp_write_queue_empty(sk)); // 重传的是发送队列?
tp-&gt;tlp_high_seq = 0;
if (!tp-&gt;snd_wnd &amp;&amp; !sock_flag(sk, SOCK_DEAD) &amp;&amp;
!((1 &lt;&lt; sk-&gt;sk_state) &amp; (TCPF_SYN_SENT | TCPF_SYN_RECV))) {
/* Receiver dastardly shrinks window. Our retransmits
* become zero probes, but we should not timeout this
* connection. If the socket is an orphan, time it out,
* we cannot allow such beasts to hang infinitely.
*/
struct inet_sock *inet = inet_sk(sk);
if (sk-&gt;sk_family == AF_INET) {
LIMIT_NETDEBUG(KERN_DEBUG pr_fmt(&quot;Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n&quot;),
&amp;inet-&gt;inet_daddr,
ntohs(inet-&gt;inet_dport), inet-&gt;inet_num,
tp-&gt;snd_una, tp-&gt;snd_nxt);
}
#if IS_ENABLED(CONFIG_IPV6)
else if (sk-&gt;sk_family == AF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
LIMIT_NETDEBUG(KERN_DEBUG pr_fmt(&quot;Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n&quot;),
&amp;np-&gt;daddr,
ntohs(inet-&gt;inet_dport), inet-&gt;inet_num,
tp-&gt;snd_una, tp-&gt;snd_nxt);
}
#endif
if (tcp_time_stamp - tp-&gt;rcv_tstamp &gt; TCP_RTO_MAX) {
tcp_write_err(sk);
goto out;
}
tcp_enter_loss(sk, 0);
tcp_retransmit_skb(sk, tcp_write_queue_head(sk));
__sk_dst_reset(sk);
goto out_reset_timer;
}
if (tcp_write_timeout(sk)) // 超出重传次数
goto out;
if (icsk-&gt;icsk_retransmits == 0) {
int mib_idx;
if (icsk-&gt;icsk_ca_state == TCP_CA_Recovery) {
if (tcp_is_sack(tp))
mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
else
mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
} else if (icsk-&gt;icsk_ca_state == TCP_CA_Loss) {
mib_idx = LINUX_MIB_TCPLOSSFAILURES;
} else if ((icsk-&gt;icsk_ca_state == TCP_CA_Disorder) ||
tp-&gt;sacked_out) {
if (tcp_is_sack(tp))
mib_idx = LINUX_MIB_TCPSACKFAILURES;
else
mib_idx = LINUX_MIB_TCPRENOFAILURES;
} else {
mib_idx = LINUX_MIB_TCPTIMEOUTS;
}
NET_INC_STATS_BH(sock_net(sk), mib_idx);
}
tcp_enter_loss(sk, 0);
if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) &gt; 0) { // 重传发送队列第一个数据包
/* Retransmission failed because of local congestion,
* do not backoff.
*/
if (!icsk-&gt;icsk_retransmits)
icsk-&gt;icsk_retransmits = 1;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
min(icsk-&gt;icsk_rto, TCP_RESOURCE_PROBE_INTERVAL),
TCP_RTO_MAX);
goto out;
}
/* Increase the timeout each time we retransmit. Note that
* we do not increase the rtt estimate. rto is initialized
* from rtt, but increases here. Jacobson (SIGCOMM 88) suggests
* that doubling rto each time is the least we can get away with.
* In KA9Q, Karn uses this for the first few times, and then
* goes to quadratic. netBSD doubles, but only goes up to *64,
* and clamps at 1 to 64 sec afterwards. Note that 120 sec is
* defined in the protocol as the maximum possible RTT. I guess
* we'll have to use something other than TCP to talk to the
* University of Mars.
*
* PAWS allows us longer timeouts and large windows, so once
* implemented ftp to mars will work nicely. We will have to fix
* the 120 second clamps though!
*/
icsk-&gt;icsk_backoff++;
icsk-&gt;icsk_retransmits++;
out_reset_timer:
/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
* used to reset timer, set to 0. Recalculate 'icsk_rto' as this
* might be increased if the stream oscillates between thin and thick,
* thus the old value might already be too high compared to the value
* set by 'tcp_set_rto' in tcp_input.c which resets the rto without
* backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
* exponential backoff behaviour to avoid continue hammering
* linear-timeout retransmissions into a black hole
*/
if (sk-&gt;sk_state == TCP_ESTABLISHED &amp;&amp;
(tp-&gt;thin_lto || sysctl_tcp_thin_linear_timeouts) &amp;&amp;
tcp_stream_is_thin(tp) &amp;&amp;
icsk-&gt;icsk_retransmits &lt;= TCP_THIN_LINEAR_RETRIES) {
icsk-&gt;icsk_backoff = 0;
icsk-&gt;icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
} else {
/* Use normal (exponential) backoff */
icsk-&gt;icsk_rto = min(icsk-&gt;icsk_rto &lt;&lt; 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk-&gt;icsk_rto, TCP_RTO_MAX); // 退出前重新设置下一次超时时间,上面计算了超时时间
if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
__sk_dst_reset(sk);
out:;
}</code></pre>