(3) 发送缓存区上限sk->sk_sndbuf

sock发送缓冲区的上限sk->sk_sndbuf在tcp_init_sock()中初始化,初始值为tcp_wmem[1],

一般为16K。

[java] 
void tcp_init_sock(struct sock *sk)
{
...
sk->sk_sndbuf = sysctl_tcp_wmem[1]; /* 16K */
sk->sk_rcvbuf = sysctl_tcp_rmem[1]; /* 85K */
...
}

(4) wmem_default和wmem_max

/proc/sys/net/core/wmem_max和/proc/sys/net/core/wmem_default,

默认值为256个的负荷为256字节的数据段的总内存消耗。

对于TCP而言,wmem_default会被tcp_wmem[1]给覆盖掉,而wmem_max作为一个上限,

限制着用户使用SO_SNDBUF时可设置的发送缓存的大小。

[java] 
#define _SK_MEM_PACKETS 256
#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX:
int sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval,
unsigned int optlen)
{
...
switch (optname) {
...
case SO_SNDBUF:
/* 设置的值不能高于wmem_max */
val = min_t(u32, val, sysctl_wmem_max);
set_sndbuf:
/* 用户使用SO_SNDBUF的标志 */
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
/* 发送缓存的上限,其实是两倍的用户设置值!*/
sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
/* Wake up sending tasks if we upped the value. */
sk->sk_write_space(sk); /*有发送缓存可写事件 */
...
}
...
}

sock发送缓存上限的动态调整

sk->sk_sndbuf为socket发送缓存的上限,发送队列的总大小不能超过这个值。

(1) 连接建立成功时

调用tcp_init_buffer_space()来调整发送缓存和接收缓存的大小。

[java] 
/* Try to fixup all. It is made immediately after connection enters
* established state.
*/
void tcp_init_buffer_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
/* 如果用户没有使用SO_RCVBUF选项,就调整接收缓冲区的上限。
* 调整之后,一般sk->sk_rcvbuf会大于初始值tcp_rmem[1]。
*/
if (! (sk->sk_userlocks & SOCK_RCVBUF_LOCK))
tcp_fixup_rcvbuf(sk);
/* 如果用户没有使用SO_SNDBUF选项,就调整发送缓冲区的上限。
* 调整之后,一般sk->sk_sndbuf会大于初始值tcp_wmem[1]。
*/
if (! (sk->sk_userlocks & SOCK_SNDBUF_LOCK))
tcp_sndbuf_expand(sk);
tp->rcvq_space.space = tp->rcv_wnd; /* 当前接收缓存的大小,只包括数据 */
tp->rcvq_space.time = tcp_time_stamp;
tp->rcvq_space.seq = tp->copied_seq; /* 下次复制从这里开始 */
maxwin = tcp_full_space(sk); /* 接收缓存上限的3/4 */
if (tp->window_clamp >= maxwin) {
tp->window_clamp = maxwin;
/* 最大的通告窗口,变为接收缓存上限的3/4的3/4 */
if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss)
tp->window_clamp = max(maxwin - (maxwin >> sysctl_tcp_app_win),
4 * tp->advmss);
}
/* Force reservation of one segment. 至少要预留一个MSS的空间 */
if (sysctl_tcp_app_win && tp->window_clamp > 2 * tp->advmss &&
tp->window_clamp + tp->advmss > maxwin)
tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_time_stamp;
}

a. 调整接收缓冲区的上限sk->sk_rcvbuf

调整之后的sk->sk_rcvbuf,一般为8倍的初始拥塞控制窗口(TCP_INIT_CWND)。

[java] 
/* Tuning rcvbuf, when connection enters established state. */
static void tcp_fixup_rcvbuf(struct sock *sk)
{
u32 mss = tcp_sk(sk)->advmss;
int rcvmem;
/* 初始的rwnd一般为2倍的初始拥塞控制窗口,即20个MSS。
* 所以rcvmem是40个MSS段耗费的总内存大小,包括协议头、sk_buff和
* skb_shared_info结构体。
*/
rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
tcp_default_init_rwnd(mss);
/* 如果让系统自动调节接收缓存的大小(默认是的) */
if (sysctl_tcp_moderate_rcvbuf)
rcvmem <<= 2; /* 增加一倍 */
/* 如果rcvmem比tcp_rmem[1]大,那么更新接收缓冲区的上限。
* rcvmem一般会比tcp_rmem[1]大。
*/
if (sk->sk_rcvbuf < rcvmem)
sk->sk_rcvbuf = min(rcvmem, syscl_tcp_rmem[2]);
}

初始的接收窗口大小,一般为2倍的初始拥塞窗口大小,即20个MSS。

[java] 
u32 tcp_default_init_rwnd(u32 mss)
{
/* Initial receive window should be twice of TCP_INIT_CWND to enable
* proper sending of new unsent data during fast recovery (RFC 3517,
* Section 4, NextSeg() rule (2)). Further place a limit when mss is larger
* than 1460.
*/
u32 init_rwnd = TCP_INIT_CWND * 2; /* 设为初始拥塞窗口的2倍 */
if (mss > 1460)
init_rwnd = max((1460 * init_rwnd) / mss, 2U);
return init_rwnd;
}

tcp_moderate_rcvbuf让系统自动调节接收缓存的大小,默认使用。

tcp_moderate_rcvbuf - BOOLEAN

If set, TCP performs receive buffer auto-tuning, attempting to automatically

size the buffer (no greater than tcp_rmem[2]) to match the size required by

the path for full throughput. Enabled by default.

b. 调整发送缓冲区的上限sk->sk_sndbuf

调整之后的sk->sk_sndbuf不少于2倍的拥塞控制窗口(tp->snd_cwnd)。

[java] 
/* Buffer size and advertised window tuning.
* Tuning sk->sk_sndbuf, when connection enters established state.
*/
static void tcp_sndbuf_expand(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
int sndmem, per_mss;
u32 nr_segs;
/* Worst case is non GSO/TSO: each frame consumes one skb and
* skb->head is kmalloced using power of two area of memory.
*/
/* 当不使用GSO/TSO时,一个TCP负荷为MSS的段所消耗的总内存 */
per_mss = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
per_mss = roundup_pow_of_two(per_mss) +
SKB_DATA_ALIGN(sizeof(struct sk_buff));
/* 数据段的个数,取TCP_INIT_CWND、tp->snd_cwnd和
* tp->reordering + 1中的最大者。
*/
nr_segs = max_t(u32, TCP_INIT_CWND, tp->snd_cwnd);
nr_segs = max_t(u32, nr_segs, tp->reordering + 1);
/* Fast Recovery (RFC 5681 3.2):
* Cubic needs 1.7 factor, rounded to 2 to include extra cushion
* (application might react slowly to POLLOUT)
*/
sndmem = 2 * nr_segs * per_mss; /* 2倍 */
/* 如果默认的发送缓冲区上限tcp_wmem[1]小于本次计算的值sndmem,
* 那么更新sk->sk_sndbuf。由于默认值为16K,所以肯定会更新的:)
*/
if (sk->sk_sndbuf < sndmem)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
}




相关内容