[MySQL Patch] Binlog文件预分配
[MySQL Patch] Binlog文件预分配
之前已经介绍过实现思路(见 ),binlog预分配在两年前被Yoshinori Matsunobu在5.1里实现,但其存在问题的是,即如果在非xfs系统上时,可能会在预分配文件时因为持有大锁,导致tps长时间为0。这里转换思路,使用一个daemon plugin来做文件分配,当binlog切换时,直接将预分配好的文件(命名为mysql-binlog.PA) rename成binlog文件。
一组简单的测试数据,之前我在percona-discuess 上发过,直接拿过来了...
my test: (using mysqlslap, ext3 filesystem)
set sync_binlog = 1
create a simple table:
create table xxx (a int auto_increment, b int, c varchar(50), primary key(a))
mysqlslap --no-defaults -uxx --create-schema=test --number-of-queries=2000000 --concurrency=$i -S $sock --query="insert into xxx values (NULL,2, 'sadasda')"
threads with binlog-prealloc Original version
10 152s 455s
30 104s 216s
50 97s 164s
70 98s 143s
90 98s 132s
110 101s 127s
130 102s 124s
150 104s 123s
以下Patch基于Percona Server5.5.24,目前处于测试中,比较简陋...
cmake时需要增加选项 -DWITH_BINLOG_PREALLOC=ON
然后 install plugin:
set binlog_prealloc = 1;
install plugin binlog_prealloc soname 'libbinlog_prealloc.so';
flush logs;
- Index: a/CMakeLists.txt
- ===================================================================
- --- a.orig/CMakeLists.txt
- +++ a/CMakeLists.txt
- @@ -161,6 +161,11 @@ INCLUDE(install_layout)
- INCLUDE(mysql_add_executable)
- # Handle options
- +OPTION(WITH_BINLOG_PREALLOC "if allow binlog file prealloced" OFF)
- +IF(WITH_BINLOG_PREALLOC)
- +ADD_DEFINITIONS(-DWITH_BINLOG_PREALLOC)
- +ENDIF()
- +
- OPTION(DISABLE_SHARED
- "Don't build shared libraries, compile code as position-dependent" OFF)
- IF(DISABLE_SHARED)
- Index: a/include/my_global.h
- ===================================================================
- --- a.orig/include/my_global.h
- +++ a/include/my_global.h
- @@ -1501,4 +1501,8 @@ static inline double rint(double x)
- #endif /* EMBEDDED_LIBRARY */
- +#if defined (HAVE_POSIX_FALLOCATE) && defined(WITH_BINLOG_PREALLOC)
- +#define BINLOG_PREALLOC
- +#endif
- +
- #endif /* my_global_h */
- Index: a/plugin/daemon_example/CMakeLists.txt
- ===================================================================
- --- a.orig/plugin/daemon_example/CMakeLists.txt
- +++ a/plugin/daemon_example/CMakeLists.txt
- @@ -17,3 +17,6 @@ MYSQL_ADD_PLUGIN(daemon_example daemon_e
- MODULE_ONLY MODULE_OUTPUT_NAME "libdaemon_example")
- INSTALL(FILES daemon_example.ini DESTINATION ${INSTALL_PLUGINDIR})
- +
- +MYSQL_ADD_PLUGIN(binlog_prealloc binlog_prealloc.cc
- + MODULE_ONLY MODULE_OUTPUT_NAME "libbinlog_prealloc")
- Index: a/plugin/daemon_example/binlog_prealloc.cc
- ===================================================================
- --- /dev/null
- +++ a/plugin/daemon_example/binlog_prealloc.cc
- @@ -0,0 +1,111 @@
- +#ifndef MYSQL_SERVER
- +#define MYSQL_SERVER
- +#endif
- +
- +#include <string.h>
- +#include <mysql/plugin.h>
- +#include <mysql_version.h>
- +#include "my_global.h"
- +#include <my_sys.h>
- +#include <sys/time.h>
- +#include "log.h"
- +
- +#if !defined(__attribute__) && (defined(__cplusplus) || !defined(__GNUC__) || __GNUC__ == 2 && __GNUC_MINOR__ < 8)
- +#define __attribute__(A)
- +#endif
- +
- +/*defined in log.cc*/
- +static pthread_t bin_prealloc_thread;
- +extern unsigned long max_binlog_size;
- +extern my_bool binlog_prealloc_inited;
- +extern ulong binlog_prealloc ;
- +extern my_bool use_plugin_prealloc;
- +extern my_bool has_prealloc_next;
- +extern pthread_mutex_t binlog_prealloc_mutex;
- +extern pthread_cond_t binlog_prealloc_cond;
- +extern char prealloc_file[FN_REFLEN];
- +extern int create_prealloc_file(char *filename);
- +
- +pthread_handler_t bin_prealloc_func(void *p)
- +{
- +
- + int fd;
- + long i = 0;
- + int ret = 0;
- + int len = 0;
- +
- + while (1){
- + if ( binlog_prealloc == 0 ||
- + !binlog_prealloc_inited ) {
- + sleep(2);
- + continue;
- + }
- +
- + pthread_mutex_lock(&(binlog_prealloc_mutex));
- + if (has_prealloc_next)
- + pthread_cond_wait(&(binlog_prealloc_cond), &(binlog_prealloc_mutex));
- +
- + int ret = create_prealloc_file(prealloc_file);
- + if (unlikely(use_plugin_prealloc) == FALSE)
- + use_plugin_prealloc = TRUE;
- +
- + has_prealloc_next = TRUE;
- +
- + pthread_mutex_unlock(&(binlog_prealloc_mutex));
- + }
- +
- + return 0;
- +}
- +
- +
- +static int bin_prealloc_init(void *p)
- +{
- + pthread_attr_t attr;
- +
- + pthread_attr_init(&attr);
- + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
- +
- + use_plugin_prealloc = FALSE;
- +
- + if (pthread_create(&bin_prealloc_thread, &attr,
- + bin_prealloc_func, NULL) != 0){
- +
- + fprintf(stderr, "Plugin 'bin_prealloc': "
- + "Could not create bin_prealloc thread!\n");
- + return 1;
- + }
- +
- + return 0;
- +}
- +
- +
- +static int bin_prealloc_deinit(void *p)
- +{
- + pthread_cancel(bin_prealloc_thread);
- + pthread_join(bin_prealloc_thread, NULL);
- +
- + has_prealloc_next = FALSE;
- + use_plugin_prealloc = FALSE;
- + return 0;
- +}
- +
- +
- +struct st_mysql_daemon bin_prealloc = { MYSQL_DAEMON_INTERFACE_VERSION };
- +
- +mysql_declare_plugin(bin_prealloc)
- +{
- + MYSQL_DAEMON_PLUGIN,
- + &bin_prealloc,
- + "binlog_prealloc",
- + "yinfeng.zwx@taobao.com",
- + "a daemon plugin to prealloc binlog file",
- + PLUGIN_LICENSE_GPL,
- + bin_prealloc_init,
- + bin_prealloc_deinit,
- + 0x0100,
- + NULL,
- + NULL,
- + NULL
- +}
- +mysql_declare_plugin_end;
- +
- Index: a/sql/log_event.cc
- ===================================================================
- --- a.orig/sql/log_event.cc
- +++ a/sql/log_event.cc
- @@ -65,6 +65,10 @@
- */
- #define FMT_G_BUFSIZE(PREC) (3 + (PREC) + 5 + 1)
- +#ifdef BINLOG_PREALLOC
- +extern ulonglong fetch_active_size(void);
- +extern ulonglong use_binlog_prealloc;
- +#endif
- #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
- static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD* thd);
- @@ -1018,7 +1022,14 @@ int Log_event::read_log_event(IO_CACHE*
- int result=0;
- char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
- DBUG_ENTER("Log_event::read_log_event");
- -
- +#ifdef BINLOG_PREALLOC
- + if (use_binlog_prealloc && file->file_name && file->type == READ_CACHE) {
- + if (mysql_bin_log.is_active(file->file_name))
- + file->end_of_file= fetch_active_size();
- + else
- + file->end_of_file= ~(my_off_t) 0;
- + }
- +#endif
- if (log_lock)
- mysql_mutex_lock(log_lock);
- if (my_b_read(file, (uchar*) buf, sizeof(buf)))
- Index: a/sql/log.cc
- ===================================================================
- --- a.orig/sql/log.cc
- +++ a/sql/log.cc
- @@ -90,6 +90,112 @@ static SHOW_VAR binlog_status_vars_detai
- {NullS, NullS, SHOW_LONG}
- };
- +#ifdef BINLOG_PREALLOC
- +
- +ulonglong active_binlog_size = 0;
- +ulong binlog_prealloc = 0;
- +
- +my_bool use_binlog_prealloc = FALSE;
- +my_bool binlog_prealloc_inited = FALSE;
- +my_bool use_plugin_prealloc = FALSE;
- +my_bool has_prealloc_next = FALSE;
- +
- +pthread_mutex_t binlog_prealloc_mutex;
- +pthread_cond_t binlog_prealloc_cond;
- +
- +char prealloc_file[FN_REFLEN];
- +
- +static void init_binlog_prealloc(const char * name)
- +{
- + use_plugin_prealloc = FALSE;
- + has_prealloc_next = FALSE;
- +
- + bzero(prealloc_file, FN_REFLEN);
- +
- + /*hardcode the prealloc file name */
- + fn_format(prealloc_file, name, mysql_data_home, "", 4);
- + size_t length = strlen(prealloc_file);
- + prealloc_file[length] = '.' ;
- + prealloc_file[length+1] = 'P' ;
- + prealloc_file[length+2] = 'A';
- + prealloc_file[length+3] = '\0';
- +
- + pthread_mutex_init(&(binlog_prealloc_mutex), NULL);
- + pthread_cond_init(&(binlog_prealloc_cond), NULL);
- + binlog_prealloc_inited = TRUE;
- +}
- +
- +ulonglong set_active_size(ulonglong new_val)
- +{
- + return __sync_val_compare_and_swap(&active_binlog_size,
- + active_binlog_size, new_val);
- +}
- +
- +
- +ulonglong fetch_active_size(void)
- +{
- + return __sync_add_and_fetch(&active_binlog_size,0);
- +}
- +
- +int create_prealloc_file(char *file_name)
- +{
- + int fd = 0;
- + int ret = 0;
- + fd = open(file_name, O_CREAT | O_RDWR, 0);
- + if (fd == -1)
- + return -1;
- +
- + ret = posix_fallocate(fd, 0, max_binlog_size)
- + || my_sync(fd, MYF(MY_WME));
- +
- + close(fd);
- + return ret;
- +}
- +
- +
- +int prealloc_binlog_with_newname(char *new_name)
- +{
- + int ret = 0;
- +
- + if (use_plugin_prealloc) {
- + ret = pthread_mutex_trylock(&(binlog_prealloc_mutex));
- +
- + /*if can't get lock ,simply return to orignal mode(means no prealloc...) */
- + if (ret != 0)
- + return -1;
- +
- + my_bool success = FALSE;
- +
- + if (has_prealloc_next &&
- + access(prealloc_file, 0) == 0 &&
- + rename(prealloc_file, new_name) == 0)
- + success = TRUE;
- +
- + has_prealloc_next = FALSE;
- +
- + pthread_mutex_unlock(&(binlog_prealloc_mutex));
- +
- + pthread_cond_broadcast(&(binlog_prealloc_cond));
- +
- + if (!success)
- + return -1;
- +
- + } else {
- +
- + int ret = create_prealloc_file(new_name);
- +
- + if (ret!= 0) {
- + fprintf(stderr, "Prealloc Binlog Failed:%s\n", new_name);
- + return -1;
- + }
- + }
- +
- + chmod(new_name, 438);
- + use_binlog_prealloc = TRUE;
- + return 0;
- +}
- +
- +#endif
- /**
- purge logs, master and slave sides both, related error code
- convertor.
- @@ -2122,6 +2228,12 @@ File open_binlog(IO_CACHE *log, const ch
- *errmsg = "Could not open log file";
- goto err;
- }
- +#ifdef BINLOG_PREALLOC
- + if (use_binlog_prealloc) {
- + log->file_name=(char *)log_file_name;
- + log->end_of_file= fetch_active_size();
- + }
- +#endif
- if (check_binlog_magic(log,errmsg))
- goto err;
- DBUG_RETURN(file);
- @@ -3182,7 +3294,9 @@ bool MYSQL_BIN_LOG::open(const char *log
- if (flush_io_cache(&log_file) ||
- mysql_file_sync(log_file.file, MYF(MY_WME)))
- goto err;
- -
- +#ifdef BINLOG_PREALLOC
- + set_active_size(log_file.pos_in_file);
- +#endif
- if (write_file_name_to_index_file)
- {
- #ifdef HAVE_REPLICATION
- @@ -4378,7 +4492,16 @@ int MYSQL_BIN_LOG::new_file_impl(bool ne
- old_name=name;
- name=0; // Don't free name
- close(LOG_CLOSE_TO_BE_OPENED | LOG_CLOSE_INDEX);
- -
- +#ifdef BINLOG_PREALLOC
- + /*try to pre-alloc binlog file,we don't care if this will fail*/
- + if (!is_relay_log && binlog_prealloc) {
- + if (unlikely(!binlog_prealloc_inited)) {
- + init_binlog_prealloc(old_name);
- + }
- + prealloc_binlog_with_newname(new_name_ptr);
- + } else
- + use_binlog_prealloc = FALSE;
- +#endif
- /*
- Note that at this point, log_state != LOG_CLOSED (important for is_open()).
- */
- @@ -5156,6 +5279,9 @@ err:
- else
- {
- bool check_purge;
- +#ifdef BINLOG_PREALLOC
- + set_active_size(event_info->log_pos);
- +#endif
- signal_update();
- error= rotate(false, &check_purge);
- mysql_mutex_unlock(&LOCK_log);
- @@ -5540,6 +5666,9 @@ bool MYSQL_BIN_LOG::write_incident(THD *
- if (!error && !(error= flush_and_sync(0)))
- {
- bool check_purge= false;
- +#ifdef BINLOG_PREALLOC
- + set_active_size(ev.log_pos);
- +#endif
- signal_update();
- error= rotate(false, &check_purge);
- mysql_mutex_unlock(&LOCK_log);
- @@ -5695,6 +5824,9 @@ void
- MYSQL_BIN_LOG::trx_group_commit_leader(group_commit_entry *leader)
- {
- DBUG_ENTER("MYSQL_BIN_LOG::trx_group_commit_leader");
- +#ifdef BINLOG_PREALLOC
- + ulonglong last_actual_pos = 0;
- +#endif
- uint xid_count= 0;
- uint write_count= 0;
- bool check_purge= false;
- @@ -5753,6 +5885,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
- }
- cache_data->commit_bin_log_file_pos= my_b_write_tell(&log_file);
- +#ifdef BINLOG_PREALLOC
- + last_actual_pos = cache_data->commit_bin_log_file_pos;
- +#endif
- if (cache_data->using_xa && cache_data->xa_xid)
- xid_count++;
- }
- @@ -5773,6 +5908,9 @@ MYSQL_BIN_LOG::trx_group_commit_leader(g
- }
- else
- {
- +#ifdef BINLOG_PREALLOC
- + set_active_size(last_actual_pos);
- +#endif
- signal_update();
- }
- @@ -6005,6 +6143,18 @@ void MYSQL_BIN_LOG::close(uint exiting)
- original position on system that doesn't support pwrite().
- */
- mysql_file_seek(log_file.file, org_position, MY_SEEK_SET, MYF(0));
- +#ifdef BINLOG_PREALLOC
- + end_io_cache(&log_file);
- + DBUG_ASSERT(is_active(log_file_name));
- + mysql_mutex_assert_owner(&LOCK_log);
- + set_active_size(log_file.pos_in_file);
- + if (use_binlog_prealloc && my_chsize(log_file.file,
- + log_file.pos_in_file, 0, MYF(MY_WME)))
- + {
- + write_error= 1;
- + sql_print_error(ER(ER_ERROR_ON_WRITE), name, errno);
- + }
- +#endif
- }
- /* this will cleanup IO_CACHE, sync and close the file */
- Index: a/sql/sys_vars.cc
- ===================================================================
- --- a.orig/sql/sys_vars.cc
- +++ a/sql/sys_vars.cc
- @@ -3330,6 +3330,12 @@ static Sys_var_uint Sys_slave_net_timeou
- VALID_RANGE(1, LONG_TIMEOUT), DEFAULT(SLAVE_NET_TIMEOUT), BLOCK_SIZE(1),
- NO_MUTEX_GUARD, NOT_IN_BINLOG, ON_CHECK(0),
- ON_UPDATE(fix_slave_net_timeout));
- +#ifdef BINLOG_PREALLOC
- +static Sys_var_ulong Sys_binlog_prealloc(
- + "binlog_prealloc", "default 0 , if binlog_prealloc >0, means prealloc binlog file",
- + GLOBAL_VAR(binlog_prealloc), CMD_LINE(REQUIRED_ARG),
- + VALID_RANGE(0,100), DEFAULT(0), BLOCK_SIZE(1));
- +#endif
- static bool check_slave_skip_counter(sys_var *self, THD *thd, set_var *var)
- {
- Index: a/sql/mysqld.h
- ===================================================================
- --- a.orig/sql/mysqld.h
- +++ a/sql/mysqld.h
- @@ -238,7 +238,9 @@ extern I_List<THD> threads;
- extern char err_shared_dir[];
- extern TYPELIB thread_handling_typelib;
- extern my_decimal decimal_zero;
- -
- +#ifdef BINLOG_PREALLOC
- +extern ulong binlog_prealloc;
- +#endif
- extern ulonglong opt_log_warnings_suppress;
- extern char* enforce_storage_engine;
评论暂时关闭