percona-discussion team mailing list archive
-
percona-discussion team
-
Mailing list archive
-
Message #00435
[Bug 354817] [NEW] --apply-log stops at 99%
Public bug reported:
On xtrabackup 0.4 rpm for RHEL5, when running innobackupex --apply-log
--use-memory=16000, it gets to 99% and then just hangs. I left it there
for a good 12 hours and when I came back it hadn't budged ...
xtrabackup is sitting in the processlist pegged at 100% CPU. Here's an
strace starting from the last output of apply-log:
[pid 6765] write(2, "InnoDB: Doing recovery: scanned "..., 79InnoDB: Doing recovery: scanned up to log sequence number 80 1103001814 (99 %)
) = 79
[pid 6765] pread(8, "( \337=\0\326\0002\0\6TM\300\366\24\0\2\0\2\200\4\200\10\21\6\2\235\16\0\201v$"..., 512, 520458240) = 512
[pid 6765] futex(0x2082cd2c, FUTEX_CMP_REQUEUE, 1, 2147483647, 0x2082ccf0, 0 <unfinished ...>
[pid 6768] <... futex resumed> ) = 0
[pid 6768] futex(0x2082ccf0, FUTEX_WAIT, 2, NULL <unfinished ...>
[pid 6765] <... futex resumed> ) = 1
[pid 6765] futex(0x2082ccf0, FUTEX_WAKE, 1 <unfinished ...>
[pid 6768] <... futex resumed> ) = 0
[pid 6768] futex(0x2082ccf0, FUTEX_WAKE, 1) = 0
[pid 6768] pwrite(8, "\0\0\0\0\0\6S\247\0\0\0P\"\270\361>\0\0\t>\1\0\0\0\377\377\377\377\377\377\377\377"..., 512, 1536) = 512
[pid 6768] fsync(8 <unfinished ...>
[pid 6765] <... futex resumed> ) = 1
[pid 6765] sched_yield() = 0
[pid 6765] futex(0x3dd0192c, FUTEX_WAIT, 1, NULL <unfinished ...>
[pid 6768] <... fsync resumed> ) = 0
[pid 6768] futex(0x3dd0192c, FUTEX_CMP_REQUEUE, 1, 2147483647, 0x3dd018f0, 0) = 1
[pid 6768] futex(0x2082cd2c, FUTEX_WAIT, 3, NULL <unfinished ...>
[pid 6765] <... futex resumed> ) = 0
[pid 6765] futex(0x3dd018f0, FUTEX_WAKE, 1) = 0
[pid 6765] mmap(NULL, 36962304, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf64373000
[pid 6765] mmap(NULL, 36962304, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf666b3000
[pid 6765] mmap(NULL, 147841024, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf689f3000
[pid 6765] open("/tmp/ibI2XSSt", O_RDWR|O_CREAT|O_EXCL, 0600) = 9
[pid 6765] unlink("/tmp/ibI2XSSt") = 0
[pid 6765] dup(9) = 10
[pid 6765] close(9) = 0
[pid 6765] fcntl(10, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
[pid 6765] fstat(10, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
[pid 6765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf716f1000
[pid 6765] lseek(10, 0, SEEK_CUR) = 0
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 114688) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 16384) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 49152) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 0) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 65536) = 16384
At this point it just sits there. I've taken a 2nd backup from a 2nd
DB server and have the same results. Backups were taken via tar stream
over ssh.
** Affects: percona-xtrabackup
Importance: Undecided
Status: New
--
--apply-log stops at 99%
https://bugs.launchpad.net/bugs/354817
You received this bug notification because you are a member of Percona
developers, which is the registrant for Percona-XtraBackup.
Status in Open source backup tool for InnoDB and XtraDB: New
Bug description:
On xtrabackup 0.4 rpm for RHEL5, when running innobackupex --apply-log --use-memory=16000, it gets to 99% and then just hangs. I left it there for a good 12 hours and when I came back it hadn't budged ... xtrabackup is sitting in the processlist pegged at 100% CPU. Here's an strace starting from the last output of apply-log:
[pid 6765] write(2, "InnoDB: Doing recovery: scanned "..., 79InnoDB: Doing recovery: scanned up to log sequence number 80 1103001814 (99 %)
) = 79
[pid 6765] pread(8, "( \337=\0\326\0002\0\6TM\300\366\24\0\2\0\2\200\4\200\10\21\6\2\235\16\0\201v$"..., 512, 520458240) = 512
[pid 6765] futex(0x2082cd2c, FUTEX_CMP_REQUEUE, 1, 2147483647, 0x2082ccf0, 0 <unfinished ...>
[pid 6768] <... futex resumed> ) = 0
[pid 6768] futex(0x2082ccf0, FUTEX_WAIT, 2, NULL <unfinished ...>
[pid 6765] <... futex resumed> ) = 1
[pid 6765] futex(0x2082ccf0, FUTEX_WAKE, 1 <unfinished ...>
[pid 6768] <... futex resumed> ) = 0
[pid 6768] futex(0x2082ccf0, FUTEX_WAKE, 1) = 0
[pid 6768] pwrite(8, "\0\0\0\0\0\6S\247\0\0\0P\"\270\361>\0\0\t>\1\0\0\0\377\377\377\377\377\377\377\377"..., 512, 1536) = 512
[pid 6768] fsync(8 <unfinished ...>
[pid 6765] <... futex resumed> ) = 1
[pid 6765] sched_yield() = 0
[pid 6765] futex(0x3dd0192c, FUTEX_WAIT, 1, NULL <unfinished ...>
[pid 6768] <... fsync resumed> ) = 0
[pid 6768] futex(0x3dd0192c, FUTEX_CMP_REQUEUE, 1, 2147483647, 0x3dd018f0, 0) = 1
[pid 6768] futex(0x2082cd2c, FUTEX_WAIT, 3, NULL <unfinished ...>
[pid 6765] <... futex resumed> ) = 0
[pid 6765] futex(0x3dd018f0, FUTEX_WAKE, 1) = 0
[pid 6765] mmap(NULL, 36962304, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf64373000
[pid 6765] mmap(NULL, 36962304, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf666b3000
[pid 6765] mmap(NULL, 147841024, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf689f3000
[pid 6765] open("/tmp/ibI2XSSt", O_RDWR|O_CREAT|O_EXCL, 0600) = 9
[pid 6765] unlink("/tmp/ibI2XSSt") = 0
[pid 6765] dup(9) = 10
[pid 6765] close(9) = 0
[pid 6765] fcntl(10, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
[pid 6765] fstat(10, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
[pid 6765] mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaf716f1000
[pid 6765] lseek(10, 0, SEEK_CUR) = 0
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 114688) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 16384) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 49152) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 0) = 16384
[pid 6765] pread(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 16384, 65536) = 16384
At this point it just sits there. I've taken a 2nd backup from a 2nd DB server and have the same results. Backups were taken via tar stream over ssh.
Follow ups
References