首页
登录 | 注册

主库的wal日志已经被归档或异常丢失如何搭建从库


关键字:wal日志归档  搭建从库 restore_command
master194.1
slave194.4
wal归档目录/backup/pgsql/pg_arch/
xlog目录/var/lib/pgsql/9.5/data/pg_xlog
 
我们知道pg的从库的搭建是通过基础备份和完整的wal日志完成的,那么如果主库的wal日志已经被归档或异常丢失,如果通过备份进行搭建从库呢?
我们可以通过restore_command命令把wal归档拷贝回xlog目录完成。具体的操作如下:

一、模拟xlog目录的wal日志丢失或已经被归档,0000000800000000000000D8该wal日志丢失
mater
wal归档目录
[root@db-hx-194-1 1941]# pwd
/backup/pgsql/pg_arch/1941
[root@db-hx-194-1 1941]# ll 0000000800000000000000D8
-rw------- 1 postgres postgres 16777216 Mar 14 12:18 0000000800000000000000D8

xlog目录
cd /var/lib/pgsql/9.5/data/pg_xlog
-rw------- 1 postgres postgres 16777216 Mar 14 12:09 0000000800000000000000D7
-rw------- 1 postgres postgres      316 Mar 14 12:09 0000000800000000000000D7.00000028.backup
-rw------- 1 postgres postgres 16777216 Mar 14 12:18 0000000800000000000000D8
drwx------ 2 postgres postgres     4096 Mar 14 12:18 archive_status
-rw------- 1 postgres postgres 16777216 Mar 14 12:18 0000000800000000000000D9
$ mv 0000000800000000000000D8 /tmp/
\

在从库的归档目录下准备好
slave

方法一: 推荐
#/bin/mount -t nfs 10.40.194.1:/backup  /backup3
# df
Filesystem                   1K-blocks      Used  Available Use% Mounted on
/dev/sdb1                   5858145720  11671396 5553476080   1% /backup
/dev/mapper/vg_root-lv_opt   178956928 139412400   39544528  78% /opt
/dev/fioa                   1220670356     32992 1220637364   1% /database
10.40.194.1:/backup         5858146304  18405376 5546742784   1% /backup3

方法二:
cp /backup3/pgsql/pg_arch/1941/*   /backup/pgsql/pg_arch/1941/

[root@oracle-hx-194-4 pg_xlog]# cd /backup/pgsql/pg_arch/1941/
[root@oracle-hx-194-4 1941]# ll
total 131080
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D1
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D2
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D3
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D4
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D5
-rw------- 1 postgres postgres      316 Mar 14 13:40 0000000800000000000000D5.00000028.backup
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D6
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D7
-rw------- 1 postgres postgres      316 Mar 14 13:40 0000000800000000000000D7.00000028.backup
-rw------- 1 postgres postgres 16777216 Mar 14 13:40 0000000800000000000000D8
 

二、搭建基础备份
master
/opt/pgdata/digoal/1921/data04/pg93backup/20180313/data
scp -rp * root@10.40.194.4:/opt/bohai/pgsql9.5/data

slave
chown -R postgres.postgres /opt/bohai/pgsql9.5/data
cd /opt/bohai/pgsql9.5/data/pg_xlog
 rm -rf *
/opt/bohai/pgsql9.5/data/pg_tblspc
mv 41002/* /opt/pgsql-9.5/tbs
rm -rf 41002
ln -s /opt/pgsql-9.5/tbs ./41002
mv 41037/* /opt/pgsql-9.5/bi_erp_itf_tbs
rm -rf 41037
ln -s /opt/pgsql-9.5/bi_erp_itf_tbs ./41037

三、恢复配置准备

方法一: 推荐
$ vi $PGDATA/recovery.conf
standby_mode = on
trigger_file = '/opt/bohai/pgsql9.5/pg.trigger'
primary_conninfo = 'host=10.40.194.1 port=5432 user=rep password=rep'
restore_command = 'cp /backup3/pgsql/pg_arch/1941/%f %p'

方法二:
$ vi $PGDATA/recovery.conf
standby_mode = on
trigger_file = '/opt/bohai/pgsql9.5/pg.trigger'
primary_conninfo = 'host=10.40.194.1 port=5432 user=rep password=rep'
restore_command = 'cp /backup/pgsql/pg_arch/1941/%f %p'

启动数据库,查看进程
#pg_ctl start -D /opt/bohai/pgsql9.5/data/
# ps aux|grep post
postgres 110488  0.0  0.3 10924720 306044 pts/4 S    15:28   0:00 /opt/bohai/pgsql9.5/bin/postgres -D /opt/bohai/pgsql9.5/data                            
postgres 110490  0.0  0.0 10924788 2924 ?       Ss   15:28   0:00 postgres: startup process   recovering 0000000800000000000000D9
postgres 110500  0.0  0.0 10929172 1944 ?       Ss   15:28   0:00 postgres: wal receiver process   streaming 0/D9019B80       

查看日志
# more  postgresql-2018-03-14_152851.log
LOG:  database system was interrupted; last known up at 2018-03-13 17:19:01 CST
LOG:  creating missing WAL directory "pg_xlog/archive_status"
LOG:  entering standby mode
cp: cannot stat `/backup/pgsql/pg_arch/1941/00000008.history': No such file or directory
LOG:  restored log file "0000000800000000000000D5" from archive
LOG:  redo starts at 0/D5000028
LOG:  consistent recovery state reached at 0/D50000F8
LOG:  database system is ready to accept read only connections
LOG:  restored log file "0000000800000000000000D6" from archive
LOG:  restored log file "0000000800000000000000D7" from archive
LOG:  restored log file "0000000800000000000000D8" from archive
cp: cannot stat `/backup/pgsql/pg_arch/1941/0000000800000000000000D9': No such file or directory
LOG:  fetching timeline history file for timeline 8 from primary server
LOG:  started streaming WAL from primary at 0/D9000000 on timeline 8

归档中找不到的WAL段可以在pg_xlog/中看到,这使得可以使用最近未归档的段。但是,在归档中可用的段将会被优先于pg_xlog/中的文件被使用。
通常,恢复将会处理完所有可用的WAL段,从而将数据库恢复到当前时间点(或者尽可能接近给定的可 用WAL段)。因此,一个正常的恢复将会以一个"文件未找到"消息结束,错误消息的准确文 本取决于你选择的restore_command。你也可能在恢复的开始看到一个针对名称类 似于00000001.history文件的错误消息。这也是正常的并且不表示在简单恢复情 况中的问题,对此的讨论见第 24.3.5 节。---节选自PG中文文档备份恢复章节

[root@oracle-hx-194-4 pg_xlog]# ll
total 81924
-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D5
-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D6
-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D7
-rw------- 1 postgres postgres 16777216 Mar 14 15:28 0000000800000000000000D8
-rw------- 1 postgres postgres 16777216 Mar 14 15:39 0000000800000000000000D9
-rw------- 1 postgres postgres      304 Mar 14 15:28 00000008.history
drwx------ 2 postgres postgres      150 Mar 14 15:28 archive_status


附基础备份脚本
[postgres@db-hx-194-1 ~]$ crontab -l
19 17  * * * /var/lib/pgsql/9.5/data/backup.sh >>/tmp/backup.log 2>&1
[postgres@db-hx-194-1 ~]$ cat /var/lib/pgsql/9.5/data/backup.sh
#!/bin/bash  
export LANG=en_US.utf8  
export PGHOME=/opt/pgsql-9.5
export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH  
export DATE=`date +"%Y%m%d"`  
export PATH=$PGHOME/bin:$PATH:.  
export PGDATA=/var/lib/pgsql/9.5/data
 
BASEDIR="/opt/pgdata/digoal/1921/data04/pg93backup"
date +%F%T  
if [ ! -d $BASEDIR/$DATE ]; then  
  mkdir -p $BASEDIR/$DATE  
  if [ $? -eq 0 ]; then  
    psql -h 127.0.0.1 -p 5432 -U postgres postgres -c "select pg_start_backup(now()::text)"  
    if [ $? -eq 0 ]; then  
      cp -r -L $PGDATA $BASEDIR/$DATE  
    else  
      echo -e "select pg_start_backup(now()::text) error"  
      exit 1  
    fi  
    psql -h 127.0.0.1 -p 5432 -U postgres postgres -c "select pg_stop_backup()"  
    date +%F%T  
    echo -e "backup successed"  
    exit 0  
  else  
    echo -e "mkdir -p $BASEDIR/$DATE error"  
    exit 1  
  fi  
else  
  echo -e "$DATE backuped, don't backup repeated"  
  exit 1  
fi  



2020 jeepxie.net webmaster#jeepxie.net
10 q. 0.009 s.
京ICP备10005923号