NGX打印日志时对特殊字符的转码,ngx特殊字符


  • 问题:
    [root@3WR ~]# curl -svo /dev/null test/ -x 127.0.0.1:9711
    日志打印结果中http_user_agent不一致

1.控制台输出结果中打印0.9.

User-Agent: curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9. zlib/1.2.3 libidn/0.6.5

2.access.log中输出结果中打印0.9.8\x7F

127.0.0.1- - [01/Jul/2015:13:01:14 +0800] “GET http://localhost/HTTP/1.1” 200 30 “-” “curl/7.15.5(x86_64-redhat-li nux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\x7F zlib/1.2.3 libidn/0.6.5”


  • 原因:
    先看下ASCII码表
八进制 十六进制 十进制 字符
177 7F 127 del

原因是\x7F字符被解释成【删除】 操作, 于是, 本来应该记录的 0.9.8, 删除一个字符后变成了0.9.
跟踪看下为什么access.log打印出了\x7F

Breakpoint 6, ngx_http_log_variable (r=0x7587e0, 
    buf=0x7436b8 "curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\\x7F zlib/1.2.3 libidn/0.6.5\"\n07t", 
    op=0x740410) at src/http/modules/ngx_http_log_module.c:893
893     value = ngx_http_get_indexed_variable(r, op->data);
(gdb) p *value
/* escape = 1,内容是"0.9.8\177",\177和\x7F是一个东西
 都是符号del */
$16 = {len = 91, valid = 1, no_cacheable = 0, not_found = 0, escape = 1, 
  data = 0x76c13c "curl/7.15.5 (x86_64-redhat-linux-gnu) libcurl/7.15.5 OpenSSL/0.9.8\177 zlib/1.2.3 libidn/0.6.5"}

(gdb) bt
#0  ngx_http_log_variable (r=Unhandled dwarf expression opcode 0xf3
) at src/http/modules/ngx_http_log_module.c:900
#1  0x0000000000447892 in ngx_http_log_handler (r=0x7587e0) at src/http/modules/ngx_http_log_module.c:331
#2  0x000000000043d35e in ngx_http_log_request (r=0x7587e0) at src/http/ngx_http_request.c:3399
#3  0x000000000043e5a7 in ngx_http_free_request (r=0x7587e0, rc=0) at src/http/ngx_http_request.c:3346
#4  0x000000000043f40b in ngx_http_set_keepalive (r=0x7587e0) at src/http/ngx_http_request.c:2789
#5  ngx_http_finalize_connection (r=0x7587e0) at src/http/ngx_http_request.c:2459
#6  0x000000000044018b in ngx_http_finalize_request (r=0x7587e0, rc=<value optimized out>)
    at src/http/ngx_http_request.c:2360
#7  0x000000000043afd9 in ngx_http_core_content_phase (r=0x7587e0, ph=Unhandled dwarf expression opcode 0xf3
) at src/http/ngx_http_core_module.c:1408
#8  0x0000000000435f93 in ngx_http_core_run_phases (r=0x7587e0) at src/http/ngx_http_core_module.c:888
#9  0x00000000004360a2 in ngx_http_handler (r=Unhandled dwarf expression opcode 0xf3
) at src/http/ngx_http_core_module.c:871
#10 0x000000000043e3bb in ngx_http_process_request (r=0x7587e0) at src/http/ngx_http_request.c:1828
#11 0x000000000044106c in ngx_http_process_request_headers (rev=Unhandled dwarf expression opcode 0xf3
) at src/http/ngx_http_request.c:1259
#12 0x00000000004415df in ngx_http_process_request_line (rev=0x7f7c35c971b0) at src/http/ngx_http_request.c:940
#13 0x0000000000441ef9 in ngx_http_wait_request_handler (rev=0x7f7c35c971b0) at src/http/ngx_http_request.c:472
#14 0x00000000004326e8 in ngx_epoll_process_events (cycle=0x73ec90, timer=Unhandled dwarf expression opcode 0xf3
) at src/event/modules/ngx_epoll_module.c:683
#15 0x0000000000429aaa in ngx_process_events_and_timers (cycle=0x73ec90) at src/event/ngx_event.c:249
#16 0x0000000000430da0 in ngx_worker_process_cycle (cycle=0x73ec90, data=Unhandled dwarf expression opcode 0xf3
) at src/os/unix/ngx_process_cycle.c:807
#17 0x000000000042f4cb in ngx_spawn_process (cycle=0x73ec90, proc=0x430cb1 <ngx_worker_process_cycle>, data=0x0, 
    name=0x4e7d83 "worker process", respawn=-4) at src/os/unix/ngx_process.c:198
#18 0x00000000004301e8 in ngx_start_worker_processes (cycle=0x73ec90, n=1, type=-4)
    at src/os/unix/ngx_process_cycle.c:362
#19 0x0000000000431d9d in ngx_master_process_cycle (cycle=0x73ec90) at src/os/unix/ngx_process_cycle.c:249
#20 0x0000000000412d71 in main (argc=Unhandled dwarf expression opcode 0xf3
) at src/core/nginx.c:412
(gdb) n
904         return (u_char *) ngx_http_log_escape(buf, value->data, value->len);  //ngx在此函数内做了转码
  • 源码面前,了无秘密
static uintptr_t
ngx_http_log_escape(u_char *dst, u_char *src, size_t size)
{
    ngx_uint_t      n;
    /* 这是十六进制字符表 */
    static u_char   hex[] = "0123456789ABCDEF";

    /* 这是ASCII码表,每一位表示一个符号,其中值为1表示此符号需要转换,值为0表示不需要转换 */
    static uint32_t   escape[] = {
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */

                    /* ?>=< ;:98 7654 3210  /.-, +*)( '&%$ #"!  */
        0x00000004, /* 0000 0000 0000 0000  0000 0000 0000 0100 */

                    /* _^]\ [ZYX WVUT SRQP  ONML KJIH GFED CBA@ */
        0x10000000, /* 0001 0000 0000 0000  0000 0000 0000 0000 */

                    /*  ~}| {zyx wvut srqp  onml kjih gfed cba` */
        0x80000000, /* 1000 0000 0000 0000  0000 0000 0000 0000 */

        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
        0xffffffff, /* 1111 1111 1111 1111  1111 1111 1111 1111 */
    };


    if (dst == NULL) {

        /* find the number of the characters to be escaped */

        n = 0;

        while (size) {
            if (escape[*src >> 5] & (1 << (*src & 0x1f))) {
                n++;
            }
            src++;
            size--;
        }

        return (uintptr_t) n;
    }

    while (size) {
         /* escape[*src >> 5],escape每一行保存了32个符号,
         所以右移5位,即除以32就找到src对应的字符保存在escape的行,
         (1 << (*src & 0x1f))此符号在escape一行中的位置,
         相&结果就是判断src符号位是否为1,需不需要转换 */
        if (escape[*src >> 5] & (1 << (*src & 0x1f))) {
            *dst++ = '\\';
            *dst++ = 'x';
            /* 一个字符占一个字节8位,每4位转成一个16进制表示 */
            /* 高4位转换成16进制 */
            *dst++ = hex[*src >> 4];
            /* 低4位转换成16进制*/
            *dst++ = hex[*src & 0xf];
            src++;

        } else {
            /* 不需要转换的字符直接赋值 */
            *dst++ = *src++;
        }
        size--;
    }

    return (uintptr_t) dst;
}

相关内容

    暂无相关文章