每一个你不满意的现在,都有一个你不努力的曾经。

clickhouse-bakup备份——AWS s3配置


查看可备份的表

[root@VM-12-9-centos clickhouse-backup]# ./clickhouse-backup --config config.yml tables;
default.demo1  422B      default  
default.t      0B        default  
default.t2     43.91MiB  default  
default.t3     43.91MiB  default

创建备份

备份全部表
[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml create {自定义备份文件名}

备份某个表
[root@VM-12-9-centos clickhouse-backup]# ./clickhouse-backup --config config.yml create --table {库名.表名} {自定义备份文件名}
2023/05/17 15:10:27  info done                      backup=bak20230517 operation=create table=default.t2
2023/05/17 15:10:27  info done                      backup=bak20230517 operation=create

上传备份

[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml upload {自定义备份文件名}

查看备份

[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml list
{自定义备份文件名}   43.92MiB   17/05/2023 03:22:43   local       
{自定义备份文件名}   44.76MiB   17/05/2023 11:50:12   remote      tar

# local     本地备份
# remote    上传的备份

恢复备份

只恢复表结构
[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml restore --table {库名.表名} {库名.表名}  --schema {自定义备份文件名}

只恢复表数据
[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml restore --table {库名.表名} {库名.表名}  --data {自定义备份文件名}

删除备份

删除本地备份
[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml delete local {自定义备份文件名}

删除远程备份
[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml delete remote {自定义备份文件名}

备份表指定分区

clickhouse 查看分区
VM-12-9-centos :) select partition from system.parts where table = '{表名}' group by partition order by partition asc;

备份指定分区
[root@test clickhouse-backup]#./clickhouse-backup --config config.yml create --table {库名.表名} --partitions {分区1},{分区2},{分区3} {自定义备份文件名}

恢复表指定分区

查询 clickhouse 表分区数量
VM-12-9-centos :) select count(*) from system.parts where table = '{表名}'\G
Row 1:
──────
count(): 97

查询 clickhouse 分区
VM-12-9-centos :) select partition from system.parts where table = '{表名}' group by partition order by partition asc;

删除 clickhouse 指定分区
VM-12-9-centos :) alter table {表名} drop partition {指定分区}
......省略

查询 clickhouse 表分区数量
VM-12-9-centos :) select count(*) from system.parts where table = '{表名}'\G
Row 1:
──────
count(): 81

恢复表分区数据 
[root@test clickhouse-backup]#./clickhouse-backup --config config.yml restore --table {库名.表名} --partitions {指定分区1},{指定分区2},{指定分区3},...省略 --data {自定义备份文件名}

查询 clickhouse 表分区数量
VM-12-9-centos :) select count(*) from system.parts where table = '{表名}'\G
Row 1:
──────
count(): 97

注意:

# 加     --data 分区备份数据迁移到元数据目录,不会覆盖其他元数据
# 不加    --data 分区备份数据会覆盖元数据目录,会清除其他元数据

AWS s3 的配置

s3:
  access_key: "必填"                   # S3_ACCESS_KEY
  secret_key: "必填"                   # S3_SECRET_KEY
  bucket: "test1"                       # S3_BUCKET
  endpoint: ""                     # S3_ENDPOINT 不用填写
  region: 地区                       # S3_REGION
  acl: private                     # S3_ACL
  assume_role_arn: ""              # S3_ASSUME_ROLE_ARN
  force_path_style: false          # S3_FORCE_PATH_STYLE
  path: "backup/"                         # S3_PATH
  disable_ssl: false               # S3_DISABLE_SSL
  compression_level: 1             # S3_COMPRESSION_LEVEL
  compression_format: tar          # S3_COMPRESSION_FORMAT
  sse: "aws:kms"                          # S3_SSE, empty (default), AES256, or aws:kms
  disable_cert_verification: false # S3_DISABLE_CERT_VERIFICATION
  use_custom_storage_class: false  # S3_USE_CUSTOM_STORAGE_CLASS
  storage_class: STANDARD          # S3_STORAGE_CLASS
  concurrency: 1                   # S3_CONCURRENCY
  part_size: 0                     # S3_PART_SIZE, if less or eq 0 then calculated as max_file_size / max_parts_count, between 5MB and 5Gb
  max_parts_count: 10000           # S3_MAX_PARTS_COUNT, number of parts for S3 multipart uploads
  allow_multipart_download: false  # S3_ALLOW_MULTIPART_DOWNLOAD, allow us fast download speed (same as upload), but will require additional disk space, download_concurrency * part size in worst case   
  debug: true                     # S3_DEBUG

恢复分布式数据

修改 config.yml
[root@test clickhouse-backup]# vim config.yml

将 clickhouse 下面参数修改为 false
check_replicas_before_attach: true

check_replicas_before_attach 为 false 将出现下面警告并且分片数据不会恢复
2023/05/24 10:46:47.985340  warn ck_local.t4 skipped cause system.replicas entry already exists and replication in progress from another replica logger=clickhouse

数据只需要在目标 shard 的第一个节点上恢复。
[root@test clickhouse-backup]# ./clickhouse-backup --config config.yml restore_remote bak_01_t4_05_24_10_33 -d -t ck_local.t4

恢复命令不能重复执行,重复执行会导致数据重复
合并分区
VM-12-9-centos :) optimize table {表名} final;
Card image cap

每一个你不满意的现在,都有一个你不努力的曾经。