TCP的发送系列 — 发送缓存的管理(一)(1)(2)
(3) 发送缓存区上限sk->sk_sndbuf
sock发送缓冲区的上限sk->sk_sndbuf在tcp_init_sock()中初始化,初始值为tcp_wmem[1],
一般为16K。
[java] void tcp_init_sock(struct sock *sk) { ... sk->sk_sndbuf = sysctl_tcp_wmem[1]; /* 16K */ sk->sk_rcvbuf = sysctl_tcp_rmem[1]; /* 85K */ ... }
(4) wmem_default和wmem_max
/proc/sys/net/core/wmem_max和/proc/sys/net/core/wmem_default,
默认值为256个的负荷为256字节的数据段的总内存消耗。
对于TCP而言,wmem_default会被tcp_wmem[1]给覆盖掉,而wmem_max作为一个上限,
限制着用户使用SO_SNDBUF时可设置的发送缓存的大小。
[java] #define _SK_MEM_PACKETS 256 #define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX: int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { ... switch (optname) { ... case SO_SNDBUF: /* 设置的值不能高于wmem_max */ val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: /* 用户使用SO_SNDBUF的标志 */ sk->sk_userlocks |= SOCK_SNDBUF_LOCK; /* 发送缓存的上限,其实是两倍的用户设置值!*/ sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); /*有发送缓存可写事件 */ ... } ... }
sock发送缓存上限的动态调整
sk->sk_sndbuf为socket发送缓存的上限,发送队列的总大小不能超过这个值。
(1) 连接建立成功时
调用tcp_init_buffer_space()来调整发送缓存和接收缓存的大小。
[java] /* Try to fixup all. It is made immediately after connection enters * established state. */ void tcp_init_buffer_space(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); int maxwin; /* 如果用户没有使用SO_RCVBUF选项,就调整接收缓冲区的上限。 * 调整之后,一般sk->sk_rcvbuf会大于初始值tcp_rmem[1]。 */ if (! (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) tcp_fixup_rcvbuf(sk); /* 如果用户没有使用SO_SNDBUF选项,就调整发送缓冲区的上限。 * 调整之后,一般sk->sk_sndbuf会大于初始值tcp_wmem[1]。 */ if (! (sk->sk_userlocks & SOCK_SNDBUF_LOCK)) tcp_sndbuf_expand(sk); tp->rcvq_space.space = tp->rcv_wnd; /* 当前接收缓存的大小,只包括数据 */ tp->rcvq_space.time = tcp_time_stamp; tp->rcvq_space.seq = tp->copied_seq; /* 下次复制从这里开始 */ maxwin = tcp_full_space(sk); /* 接收缓存上限的3/4 */ if (tp->window_clamp >= maxwin) { tp->window_clamp = maxwin; /* 最大的通告窗口,变为接收缓存上限的3/4的3/4 */ if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) tp->window_clamp = max(maxwin - (maxwin >> sysctl_tcp_app_win), 4 * tp->advmss); } /* Force reservation of one segment. 至少要预留一个MSS的空间 */ if (sysctl_tcp_app_win && tp->window_clamp > 2 * tp->advmss && tp->window_clamp + tp->advmss > maxwin) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->snd_cwnd_stamp = tcp_time_stamp; }
a. 调整接收缓冲区的上限sk->sk_rcvbuf
调整之后的sk->sk_rcvbuf,一般为8倍的初始拥塞控制窗口(TCP_INIT_CWND)。
[java] /* Tuning rcvbuf, when connection enters established state. */ static void tcp_fixup_rcvbuf(struct sock *sk) { u32 mss = tcp_sk(sk)->advmss; int rcvmem; /* 初始的rwnd一般为2倍的初始拥塞控制窗口,即20个MSS。 * 所以rcvmem是40个MSS段耗费的总内存大小,包括协议头、sk_buff和 * skb_shared_info结构体。 */ rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * tcp_default_init_rwnd(mss); /* 如果让系统自动调节接收缓存的大小(默认是的) */ if (sysctl_tcp_moderate_rcvbuf) rcvmem <<= 2; /* 增加一倍 */ /* 如果rcvmem比tcp_rmem[1]大,那么更新接收缓冲区的上限。 * rcvmem一般会比tcp_rmem[1]大。 */ if (sk->sk_rcvbuf < rcvmem) sk->sk_rcvbuf = min(rcvmem, syscl_tcp_rmem[2]); }
初始的接收窗口大小,一般为2倍的初始拥塞窗口大小,即20个MSS。
[java] u32 tcp_default_init_rwnd(u32 mss) { /* Initial receive window should be twice of TCP_INIT_CWND to enable * proper sending of new unsent data during fast recovery (RFC 3517, * Section 4, NextSeg() rule (2)). Further place a limit when mss is larger * than 1460. */ u32 init_rwnd = TCP_INIT_CWND * 2; /* 设为初始拥塞窗口的2倍 */ if (mss > 1460) init_rwnd = max((1460 * init_rwnd) / mss, 2U); return init_rwnd; }
tcp_moderate_rcvbuf让系统自动调节接收缓存的大小,默认使用。
tcp_moderate_rcvbuf - BOOLEAN
If set, TCP performs receive buffer auto-tuning, attempting to automatically
size the buffer (no greater than tcp_rmem[2]) to match the size required by
the path for full throughput. Enabled by default.
b. 调整发送缓冲区的上限sk->sk_sndbuf
调整之后的sk->sk_sndbuf不少于2倍的拥塞控制窗口(tp->snd_cwnd)。
[java] /* Buffer size and advertised window tuning. * Tuning sk->sk_sndbuf, when connection enters established state. */ static void tcp_sndbuf_expand(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); int sndmem, per_mss; u32 nr_segs; /* Worst case is non GSO/TSO: each frame consumes one skb and * skb->head is kmalloced using power of two area of memory. */ /* 当不使用GSO/TSO时,一个TCP负荷为MSS的段所消耗的总内存 */ per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + MAX_TCP_HEADER + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); per_mss = roundup_pow_of_two(per_mss) + SKB_DATA_ALIGN(sizeof(struct sk_buff)); /* 数据段的个数,取TCP_INIT_CWND、tp->snd_cwnd和 * tp->reordering + 1中的最大者。 */ nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd); nr_segs = max_t(u32, nr_segs, tp->reordering + 1); /* Fast Recovery (RFC 5681 3.2): * Cubic needs 1.7 factor, rounded to 2 to include extra cushion * (application might react slowly to POLLOUT) */ sndmem = 2 * nr_segs * per_mss; /* 2倍 */ /* 如果默认的发送缓冲区上限tcp_wmem[1]小于本次计算的值sndmem, * 那么更新sk->sk_sndbuf。由于默认值为16K,所以肯定会更新的:) */ if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); }
评论暂时关闭