6.2
/data/redis
# vim /etc/sysctl.conf
vm.overcommit_memory=1
net.core.somaxconn=1024
net.ipv4.tcp_max_syn_backlog=1024
bash 中执行 sysctl -p
生效
tar xvzf redis-6.2.tar.gz -C /opt/
# vim redis.conf
# =========================== 基础部分 ======================
# bind 监听网络地址, * 表示所有地址
# bind 192.168.1.2
bind *
# 监听端口号
port 6379
# 设置 redis 密码为 redispw
requirepass redispw
# tcp listen 队列长度
tcp-backlog 1024
logfile "/var/log/redis.log"
# 保护模式
protected-mode yes
# 是否后台运行
daemonize yes
pidfile /var/run/redis.pid
databases 16
# =========================== 持久化部分 ======================
# save <时间范围> <修改的 key 的数量>
# save 3600 100 表示,如果 3600 秒后,如果有 100 个 key 有变化,则进行 dump
save 900 100
save 300 1000
save 60 10000
dbfilename "dump.rdb"
dir "/data/redis/"
appendonly yes
appendfilename "appendonly.aof"
# 如果持久化时间比较长,可考虑关闭 rdb 压缩和检查校验和
rdbcompression no
rdbchecksum no
# =========================== 内存管理部分 ======================
# 当 redis 占用内存达到 30GB 时,再次写入会报错
maxmemory 30GB
# 内存 key 逐出策略,暂时不需要,作为数据缓存层时需要
# volatile-lru -> Evict using approximated LRU, only keys with an expire set.
# allkeys-lru -> Evict any key using approximated LRU.
# volatile-lfu -> Evict using approximated LFU, only keys with an expire set.
# allkeys-lfu -> Evict any key using approximated LFU.
# volatile-random -> Remove a random key having an expire set.
# allkeys-random -> Remove a random key, any key.
# volatile-ttl -> Remove the key with the nearest expire time (minor TTL)
# noeviction -> Don't evict anything, just return an error on write operations.
maxmemory-policy noeviction
如果是部署单机节点,那么接下来直接运行 redis-server redis.conf
就可以启动 redis-server 了
如果部署 redis 集群,需要继续往下进行。
redis 集群模式需要至少三个节点,因此如果需要为 redis 集群添加副本,则机器数量至少为 3 + replica * 3
,即副本数为 1 时,需要节点数量为 3 + 1 * 3 = 6
, 6 个节点;0个副本,则需要 3 + 0 * 3 =3
个节点
在 第5步 配置文件的基础上添加集群相关的配置项
# 是否启用集群模式
cluster-enabled yes
# 集群配置文件,仅指定文件名,此文件由 redis-server 自动生成
cluster-config-file nodes-6379.conf
# 节点之间通信超时时间
cluster-node-timeout 15000
# 这个因子时用来计算当前节点是否能够成为 master
# (node-timeout * replica-validity-factory) + repl-ping-replica-period
# 如果当前 master 上次同步时间超过这个范围内,则无法成为 master,并进行故障转移
cluster-replica-validity-factor 10
# 只有这个 master 拥有一个副本时才进行迁移
cluster-migration-barrier 1
# 如果集群挂了一些节点,而且暂时无法恢复,我们希望剩余的部分能够提供部分数据服务
cluster-require-full-coverage yes
# 当 redis 集群失败,或者有节点无法连接时,redis会停止服务,防止数据不一致的情况发生
cluster-allow-reads-when-down no
$ redis-cli --cluster create 192.168.1.1:6379 192.168.1.2:6379 192.168.1.3:6379 192.168.1.4:6379 192.168.1.5:6379 192.168.1.6:6379 --cluster-replicas 1
# 输出类似下面,然后输入 yes 即可创建集群
>>> Performing hash slots allocation on 6 nodes...
Master[0] -> Slots 0 - 5460
Master[1] -> Slots 5461 - 10922
Master[2] -> Slots 10923 - 16383
Adding replica 127.0.0.1:6383 to 127.0.0.1:6379
Adding replica 127.0.0.1:6384 to 127.0.0.1:6380
Adding replica 127.0.0.1:6382 to 127.0.0.1:6381
>>> Trying to optimize slaves allocation for anti-affinity
[WARNING] Some slaves are in the same host as their master
M: 21288e053878f469afe96906b54ec9888fc40e2e 127.0.0.1:6379
slots:[0-5460] (5461 slots) master
M: f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64 127.0.0.1:6380
slots:[5461-10922] (5462 slots) master
M: 7c6d8f075b9c74ea249a957cee89f3db0b45eb74 127.0.0.1:6381
slots:[10923-16383] (5461 slots) master
S: 84252a1eb718f14737cc6fead38a25dee6a467ef 127.0.0.1:6382
replicates 21288e053878f469afe96906b54ec9888fc40e2e
S: 228937bc9dfb6d18204cefd810490b5bf590ef48 127.0.0.1:6383
replicates f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64
S: 335ce8d6a27eba0d9ba5917ff2c756c7bc40bcea 127.0.0.1:6384
replicates 7c6d8f075b9c74ea249a957cee89f3db0b45eb74
Can I set the above configuration? (type 'yes' to accept): yes
# 输入 yes 之后,集群就会按照上面的拓扑进行创建
>>> Nodes configuration updated
>>> Assign a different config epoch to each node
>>> Sending CLUSTER MEET messages to join the cluster
Waiting for the cluster to join
.
>>> Performing Cluster Check (using node 127.0.0.1:6379)
M: 21288e053878f469afe96906b54ec9888fc40e2e 127.0.0.1:6379
slots:[0-5460] (5461 slots) master
1 additional replica(s)
S: 335ce8d6a27eba0d9ba5917ff2c756c7bc40bcea 127.0.0.1:6384
slots: (0 slots) slave
replicates 21288e053878f469afe96906b54ec9888fc40e2e
S: 84252a1eb718f14737cc6fead38a25dee6a467ef 127.0.0.1:6382
slots: (0 slots) slave
replicates f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64
S: 228937bc9dfb6d18204cefd810490b5bf590ef48 127.0.0.1:6383
slots: (0 slots) slave
replicates 7c6d8f075b9c74ea249a957cee89f3db0b45eb74
M: f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64 127.0.0.1:6380
slots:[5461-10922] (5462 slots) master
1 additional replica(s)
M: 7c6d8f075b9c74ea249a957cee89f3db0b45eb74 127.0.0.1:6381
slots:[10923-16383] (5461 slots) master
1 additional replica(s)
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
# 必须使用 redis-cli -c ,某则会提示不支持 MOVED 指令
$ redis-cli -c
127.0.0.1:6379> set a 1
-> Redirected to slot [15495] located at 127.0.0.1:6381
OK
127.0.0.1:6381> set b 2
-> Redirected to slot [3300] located at 127.0.0.1:6379
OK
127.0.0.1:6379> get a
-> Redirected to slot [15495] located at 127.0.0.1:6381
"1"
127.0.0.1:6381> get b
-> Redirected to slot [3300] located at 127.0.0.1:6379
"2"
# cluster nodes 查看集群节点状态
127.0.0.1:6379> cluster nodes
335ce8d6a27eba0d9ba5917ff2c756c7bc40bcea 127.0.0.1:6384@16384 slave 21288e053878f469afe96906b54ec9888fc40e2e 0 1656469917000 1 connected
84252a1eb718f14737cc6fead38a25dee6a467ef 127.0.0.1:6382@16382 slave f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64 0 1656469916778 2 connected
228937bc9dfb6d18204cefd810490b5bf590ef48 127.0.0.1:6383@16383 slave 7c6d8f075b9c74ea249a957cee89f3db0b45eb74 0 1656469916000 3 connected
21288e053878f469afe96906b54ec9888fc40e2e 127.0.0.1:6379@16379 myself,master - 0 1656469916000 1 connected 0-5460
f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64 127.0.0.1:6380@16380 master - 0 1656469918981 2 connected 5461-10922
7c6d8f075b9c74ea249a957cee89f3db0b45eb74 127.0.0.1:6381@16381 master - 0 1656469917845 3 connected 10923-16383
# cluster info 查看集群健康状态
127.0.0.1:6379> cluster info
cluster_state:ok
cluster_slots_assigned:16384
cluster_slots_ok:16384
cluster_slots_pfail:0
cluster_slots_fail:0
cluster_known_nodes:6
cluster_size:3
cluster_current_epoch:6
cluster_my_epoch:1
cluster_stats_messages_ping_sent:450
cluster_stats_messages_pong_sent:447
cluster_stats_messages_sent:897
cluster_stats_messages_ping_received:442
cluster_stats_messages_pong_received:450
cluster_stats_messages_meet_received:5
cluster_stats_messages_received:897
注意 redis-cluster 根据 key 作为最小单位划分数据分布的节点,因此单个 hash/list 类型,对应的 value 只分布在一个节点上.多个 key 的才会分布在集群的不同节点上。
命令行查看集群状态
$ redis-cli --cluster check localhost:6379
localhost:6379 (21288e05...) -> 1 keys | 5461 slots | 1 slaves.
127.0.0.1:6380 (f34a5a5a...) -> 0 keys | 5462 slots | 1 slaves.
127.0.0.1:6381 (7c6d8f07...) -> 1 keys | 5461 slots | 1 slaves.
[OK] 2 keys in 3 masters.
0.00 keys per slot on average.
>>> Performing Cluster Check (using node localhost:6379)
M: 21288e053878f469afe96906b54ec9888fc40e2e localhost:6379
slots:[0-5460] (5461 slots) master
1 additional replica(s)
S: 335ce8d6a27eba0d9ba5917ff2c756c7bc40bcea 127.0.0.1:6384
slots: (0 slots) slave
replicates 21288e053878f469afe96906b54ec9888fc40e2e
S: 84252a1eb718f14737cc6fead38a25dee6a467ef 127.0.0.1:6382
slots: (0 slots) slave
replicates f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64
S: 228937bc9dfb6d18204cefd810490b5bf590ef48 127.0.0.1:6383
slots: (0 slots) slave
replicates 7c6d8f075b9c74ea249a957cee89f3db0b45eb74
M: f34a5a5a97de04f4b5d81e1b4d2397c7a9a88b64 127.0.0.1:6380
slots:[5461-10922] (5462 slots) master
1 additional replica(s)
M: 7c6d8f075b9c74ea249a957cee89f3db0b45eb74 127.0.0.1:6381
slots:[10923-16383] (5461 slots) master
1 additional replica(s)
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
redis 集群部署到此结束,后续是增加节点,以及手动故障修复的部分
首先找到现有的主节点
$ redis-cli --cluster info 127.0.0.1:6379
127.0.0.1:6379 (3e7294b4...) -> 0 keys | 5461 slots | 1 slaves.
127.0.0.1:6381 (05390615...) -> 0 keys | 5461 slots | 1 slaves.
127.0.0.1:6380 (f1fb842d...) -> 0 keys | 5462 slots | 1 slaves.
[OK] 0 keys in 3 masters.
0.00 keys per slot on average.
然后按照 1 步骤,部署单机 redis, 然后使用下面命令将其加入到集群中:
# redis-cli --cluster add-node <新的 redis 地址> <已有主节点地址,通过上面命令找到>
$ redis-cli --cluster add-node 127.0.0.1:6382 127.0.0.1:6379
>>> Adding node 127.0.0.1:6382 to cluster 127.0.0.1:6379
>>> Performing Cluster Check (using node 127.0.0.1:6379)
M: 09f1dd1dd20bccb9041f749d4f99bc0e36d2b8f7 127.0.0.1:6379
slots:[0-5460] (5461 slots) master
M: 9b50546645fbfcf8776e5bf4fc7d9f712dc38ab5 127.0.0.1:6380
slots:[5461-10922] (5462 slots) master
M: 7f4fae24ea937cc3c2573b2606bba9ef73fa0c83 127.0.0.1:6381
slots:[10923-16383] (5461 slots) master
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
>>> Send CLUSTER MEET to node 127.0.0.1:6382 to make it join the cluster.
[OK] New node added correctly.
新的节点目前还无法保存数据,因为数据 slot 没有分布到新的 master 节点上,需要平衡 slot 和数据。
可以通过下面的命令查看 slot 分布:
$ redis-cli --cluster info 127.0.0.1:6379 127.0.0.1:6379 (09f1dd1d...) -> 0 keys | 5461 slots | 0 slaves.
127.0.0.1:6382 (31a5fcfe...) -> 0 keys | 0 slots | 0 slaves.
127.0.0.1:6380 (9b505466...) -> 0 keys | 5462 slots | 0 slaves.
127.0.0.1:6381 (7f4fae24...) -> 0 keys | 5461 slots | 0 slaves.
[OK] 0 keys in 4 masters.
0.00 keys per slot on average.
可以看到新的节点上没有任何 slot
通过下面命令进行平衡
# redis-cli --cluster reblanace <任意一个主节点地址>
$ redis-cli --cluster rebalance 127.0.0.1:6379
Moving 3258 slots from 127.0.0.1:6382 to 127.0.0.1:6379
################.....
Moving 2110 slots from 127.0.0.1:6382 to 127.0.0.1:6381
###############.....
Moving 1147 slots from 127.0.0.1:6380 to 127.0.0.1:6381
################....
再次查看 redis slot 分布, 可以看到 redis slot 已经分布均匀
$ redis-cli --cluster info 127.0.0.1:6379
127.0.0.1:6379 (09f1dd1d...) -> 0 keys | 4096 slots | 0 slaves.
127.0.0.1:6382 (31a5fcfe...) -> 0 keys | 4096 slots | 0 slaves.
127.0.0.1:6380 (9b505466...) -> 0 keys | 4096 slots | 0 slaves.
127.0.0.1:6381 (7f4fae24...) -> 0 keys | 4096 slots | 0 slaves.
# redis-cli --cluster add-node <新的 redis 节点> <已有的 redis master 节点> --cluster-slave
$ redis-cli --cluster add-node 127.0.0.1:6383 127.0.0.1:6379 --cluster-slave
这样 redis 会自动会副本较少的 master 节点添加一个副本节点。
首先使用 redis-cli --cluster check <master 地址>
查看节点 nodeid
$ redis-cli --cluster check 127.0.0.1:6379
127.0.0.1:6379 (09f1dd1d...) -> 0 keys | 4096 slots | 1 slaves.
127.0.0.1:6382 (31a5fcfe...) -> 0 keys | 4096 slots | 0 slaves.
127.0.0.1:6380 (9b505466...) -> 0 keys | 4096 slots | 0 slaves.
127.0.0.1:6381 (7f4fae24...) -> 0 keys | 4096 slots | 0 slaves.
[OK] 0 keys in 4 masters.
0.00 keys per slot on average.
>>> Performing Cluster Check (using node 127.0.0.1:6379)
M: 09f1dd1dd20bccb9041f749d4f99bc0e36d2b8f7 127.0.0.1:6379
slots:[5154-7883],[11141-12506] (4096 slots) master
1 additional replica(s)
M: 31a5fcfe55686aedffac99e0fb5bc23080ac5f12 127.0.0.1:6382
slots:[0-2511],[3480-4314],[4623-5153],[7884-8101] (4096 slots) master
S: cf97c2c9a1db18a245253ac4dfc3ffd7e03c8858 127.0.0.1:6383
slots: (0 slots) slave
replicates 09f1dd1dd20bccb9041f749d4f99bc0e36d2b8f7
M: 9b50546645fbfcf8776e5bf4fc7d9f712dc38ab5 127.0.0.1:6380
slots:[2512-3479],[4315-4622],[9776-10922],[12507-13871],[15237-15544] (4096 slots) master
M: 7f4fae24ea937cc3c2573b2606bba9ef73fa0c83 127.0.0.1:6381
slots:[8102-9775],[10923-11140],[13872-15236],[15545-16383] (4096 slots) master
[OK] All nodes agree about slots configuration.
>>> Check for open slots...
>>> Check slots coverage...
[OK] All 16384 slots covered.
对于副本节点,我们可以直接删除:
# redis-cli --cluster del-node <主节点地址> <副本节点 node id>
redis-cli --cluster del-node 127.0.0.1:6379 cf97c2c9a1db18a245253ac4dfc3ffd7e03c8858
对于主节点,我们需要先均衡主节点上的 slots,然后在删除主节点
# 这条命令将要被删除的主节点权重设置为 0 ,使得上面的 slot 自动均衡到其他 node 上
# redis-cli --cluster rebalance <主节点地址> --cluster-weight <要被删除的主节点 node id>=0
redis-cli --cluster rebalance 127.0.0.1:6379 --cluster-weight 31a5fcfe55686aedffac99e0fb5bc23080ac5f12=0
redis-cli --cluster del-node 127.0.0.1:6379 31a5fcfe55686aedffac99e0fb5bc23080ac5f12
默认如果 redis master 节点宕机之后,副本 slave 节点会自动成为 master 节点
有的时候,我们希望能够手动进行故障转移,手动将 slave 节点升级为 master 节点,提前进行手动故障转移,然后关闭 master 服务器,避免中断服务。
$ redis -c -h <master ip> -p <master port>
127.0.0.1:6379> cluster nodes
f23ff247157b00f6a549ae55c819f5e9c64ac1a6 127.0.0.1:6381@16381 master - 0 1656481018000 3 connected 10923-16383
c1a829314ea665df29a389884006d6b25dd36851 127.0.0.1:6382@16382 slave f7f3af9dd3bd0bdf3439a3e73b70f7bd43e242f8 0 1656481018000 2 connected
d7746a60aff205df35abfac3bcbb6906ea6eab5d 127.0.0.1:6384@16384 slave 5ec24695947e5d9bb6e9ec1faa5645fdcff5cf42 0 1656481018982 8 connected
f7f3af9dd3bd0bdf3439a3e73b70f7bd43e242f8 127.0.0.1:6380@16380 master - 0 1656481020011 2 connected 5461-10922
5ec24695947e5d9bb6e9ec1faa5645fdcff5cf42 127.0.0.1:6379@16379 myself,master - 0 1656481019000 8 connected 0-5460
3206e286f401971a0333fcfddaea20fea51392dd 127.0.0.1:6383@16383 slave f23ff247157b00f6a549ae55c819f5e9c64ac1a6 0 1656481019000 3 connected
# 通过 cluster nodes 命令我们可以看到所有的 cluster 节点, 假如我们向故障转移第一个 master f23ff247157b00f6a549ae55c819f5e9c64ac1a6 127.0.0.1:6381@16381
# 我们可以通过命令找到第一个 master 的 replicas
127.0.0.1:6379> CLUSTER REPLICAS f23ff247157b00f6a549ae55c819f5e9c64ac1a6
1) "3206e286f401971a0333fcfddaea20fea51392dd 127.0.0.1:6383@16383 slave f23ff247157b00f6a549ae55c819f5e9c64ac1a6 0 1656481152673 3 connected"
# 现在我们找到了第一个 master 的 repliaces 节点位置 "3206e286f401971a0333fcfddaea20fea51392dd 127.0.0.1:6383@16383
# 我们再次通过 redis-cli 连接上去
$ redis-cli -c -h 127.0.0.1 -p 6383
127.0.0.1:6383> CLUSTER FAILOVER
OK
127.0.0.1:6383> cluster nodes
f7f3af9dd3bd0bdf3439a3e73b70f7bd43e242f8 127.0.0.1:6380@16380 master - 0 1656481277000 2 connected 5461-10922
f23ff247157b00f6a549ae55c819f5e9c64ac1a6 127.0.0.1:6381@16381 slave 3206e286f401971a0333fcfddaea20fea51392dd 0 1656481278837 9 connected
c1a829314ea665df29a389884006d6b25dd36851 127.0.0.1:6382@16382 slave f7f3af9dd3bd0bdf3439a3e73b70f7bd43e242f8 0 1656481277000 2 connected
d7746a60aff205df35abfac3bcbb6906ea6eab5d 127.0.0.1:6384@16384 slave 5ec24695947e5d9bb6e9ec1faa5645fdcff5cf42 0 1656481277000 8 connected
3206e286f401971a0333fcfddaea20fea51392dd 127.0.0.1:6383@16383 myself,master - 0 1656481275000 9 connected 10923-16383
5ec24695947e5d9bb6e9ec1faa5645fdcff5cf42 127.0.0.1:6379@16379 master - 0 1656481277824 8 connected 0-5460
# 稍等一会,再次执行 cluster nodes ,我们可以看到 f23ff247157b00f6a549ae55c819f5e9c64ac1a6 127.0.0.1:6381@16381 已经成为 slave 节点了