高可用集群框架
图片转载之http://www.178linux.com/16656
实验拓扑:
两台节点服务器:
node1 192.168.150.137 node1.com
node2 192.168.150.138 node2.com
nfs 192.168.150.139
ansible 192.168.150.140
1、集群配置前准备
两节点配置时间同步,访问互信,host名称和解析一致
由于两节点配置,可以和ansible一起玩
修改hosts
~]# hostnamectl set-hostname node1.com
~]# uname -n
node1.com
~]# vim /etc/hosts
192.168.150.137 node1 node1.com
192.168.150.138 node2 node2.com
~]# hostnamectl set-hostname node2.com
~]# uname -n
node2.com
~]# vim /etc/hosts
192.168.150.137 node1 node1.com
192.168.150.138 node2 node2.com
ansible主机安装配置
yum -y install ansible 配置好epel源
编辑配置
~]# cd /etc/ansible/
ansible]# cp hosts{,.bak}
ansible]# vim hosts
[haservers]
192.168.150.137
192.168.150.138
建立ssh公钥认证
[root@localhost ~]# ssh-keygen -t rsa -P ''
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
db:54:47:3f:ab:04:0e:55:be:fc:1f:cb:ef:59:d1:e9 root@localhost.localdomain
The key's randomart image is:
+–[ RSA 2048]—-+
| …. |
| . .. . |
| . ……|
| o.o.. =|
| S .. + +.|
| + . + .|
| . . . E.|
| . *|
| ==|
+—————–+
[root@localhost ~]# ssh-copy-id -i .ssh/id_rsa.pub root@192.168.150.137
The authenticity of host '192.168.150.137 (192.168.150.137)' can't be established.
ECDSA key fingerprint is 1f:41:1e:c2:4f:20:9b:24:65:dc:9e:50:28:46:be:36.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any th
at are already installed/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed — if you are prompted now it
is to install the new keysroot@192.168.150.137's password:
Number of key(s) added: 1
Now try logging into the machine, with: "ssh 'root@192.168.150.137'"
and check to make sure that only the key(s) you wanted were added.
[root@localhost ~]# ssh-copy-id -i .ssh/id_rsa.pub root@192.168.150.138
The authenticity of host '192.168.150.138 (192.168.150.138)' can't be established.
ECDSA key fingerprint is 1f:41:1e:c2:4f:20:9b:24:65:dc:9e:50:28:46:be:36.
Are you sure you want to continue connecting (yes/no)? yes
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any th
at are already installed/usr/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed — if you are prompted now it
is to install the new keysroot@192.168.150.138's password:
Permission denied, please try again.
root@192.168.150.138's password:
Number of key(s) added: 1
Now try logging into the machine, with: "ssh 'root@192.168.150.138'"
and check to make sure that only the key(s) you wanted were added.
[root@localhost ~]# ssh 192.168.150.137
Last login: Tue Jan 17 18:50:53 2017 from 192.168.150.1
[root@node1 ~]# exit
登出
Connection to 192.168.150.137 closed.
[root@localhost ~]# ssh 192.168.150.138
Last failed login: Tue Jan 17 19:26:55 CST 2017 from 192.168.150.140 on ssh:notty
There was 1 failed login attempt since the last successful login.
Last login: Tue Jan 17 18:51:06 2017 from 192.168.150.1
[root@node2 ~]# exit
登出
Connection to 192.168.150.138 closed.
测试
~]# ansible all -m ping
192.168.150.137 | SUCCESS => {
"changed": false,
"ping": "pong"
}
192.168.150.138 | SUCCESS => {
"changed": false,
"ping": "pong"
}
进行ntpdate安装
~]# ansible all -m yum -a "name=ntpdate state=present"
执行计划配置
~]# ansible all -m cron -a "minute=*/5 job='/sbin/ntpdate 1.cn.pool.ntp.org &>/dev/null' na
me=Synctime"
2、安装corosync,pacemaker,crmsh
首先安装corosync和pacemaker
由于corosync是pacemake的依赖包,所有安装完pacemaker后corosync自动被安装上
使用ansible安装
~]# ansible all -m yum -a "name=pacemaker state=present"
node节点确认
~]# rpm -qa pacemaker
pacemaker-1.1.15-11.el7_3.2.x86_64
~]# rpm -qa corosync
corosync-2.4.0-4.el7.x86_64
crmsh安装
由于crmsh在yum仓库和epel源中没有,需到指定地址去下载
http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/noarch/
将包下载至ansible主机并拷贝至节点主机,进行安装
~]# ls crmsh/
asciidoc-8.6.9-32.1.noarch.rpm crmsh-scripts-2.3.2-1.1.noarch.rpm
asciidoc-examples-8.6.9-32.1.noarch.rpm crmsh-test-2.3.2-1.1.noarch.rpm
crmsh-2.3.2-1.1.noarch.rpm python-parallax-1.0.1-28.1.noarch.rpm
~]# ansible all -m shell -a 'mkdir /root/crmsh'
192.168.150.137 | SUCCESS | rc=0 >>
192.168.150.138 | SUCCESS | rc=0 >>
[root@localhost ~]# ansible all -m copy -a "src=/root/crmsh/ dest=/root/crmsh/"
192.168.150.137 | SUCCESS => {
"changed": true,
"dest": "/root/crmsh/",
"src": "/root/crmsh"
}
192.168.150.138 | SUCCESS => {
"changed": true,
"dest": "/root/crmsh/",
"src": "/root/crmsh"
}
~]# ansible all -m shell -a 'yum -y install /root/crmsh/*.rpm'
节点确认
~]# crm
crm(live)#
3、配置corosync和pacemaker,并进行服务启动
corosync配置文件修改,ansible主机上修改并部署
~]# yum -y install pacemaker
~]# cd /etc/corosync/
corosync]# ls
corosync.conf.example corosync.conf.example.udpu corosync.xml.example uidgid.d
corosync]# cp corosync.conf.example corosync.conf
corosync]# vim corosync.conf
corosync]# grep -v "^[[:space:]]*#" corosync.conf | grep -v "^$"
totem {
version: 2
cluster_name:mycluster
crypto_cipher: aes128 节点内通信的加密
crypto_hash: sha1
interface {
ringnumber: 0
bindnetaddr: 192.168.150.0
mcastaddr: 239.255.1.1 广播地址
mcastport: 5405
ttl: 1 防止环路
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
logfile: /var/log/cluster/corosync.log
to_syslog: no
debug: off
timestamp: on
logger_subsys {
subsys: QUORUM
debug: off
}
}
quorum {
provider: corosync_votequorum 投票机制使用corosync自带
}
nodelist { 定义节点
node {
ring0_addr: 192.168.150.137
nodeid: 1
}
node {
ring0_addr: 192.168.150.138
nodeid: 2
}
}
创建认证文件
[root@localhost corosync]# corosync-keygen -l
Corosync Cluster Engine Authentication key generator.
Gathering 1024 bits for key from /dev/urandom.
Writing corosync key to /etc/corosync/authkey.
[root@localhost corosync]# ls -lh
总用量 20K
-r——– 1 root root 128 1月 17 20:27 authkey
-rw-r–r– 1 root root 3.0K 1月 17 20:22 corosync.conf
-rw-r–r– 1 root root 2.9K 11月 7 18:09 corosync.conf.example
-rw-r–r– 1 root root 767 11月 7 18:09 corosync.conf.example.udpu
-rw-r–r– 1 root root 3.3K 11月 7 18:09 corosync.xml.example
drwxr-xr-x 2 root root 6 11月 7 18:09 uidgid.d
使用ansible将配置文件及认证文件全部拷贝至节点服务器,注意authkey的权限
corosync]# ansible all -m copy -a "src=/etc/corosync/authkey mode=400 dest =/etc/corosync/authkey"
corosync]# ansible all -m copy -a "src=/etc/corosync/corosync.conf dest=/e tc/corosync/corosync.conf"
进入节点主机进行验证
~]# ls -l /etc/corosync/
总用量 20
-r——– 1 root root 128 1月 17 14:45 authkey
-rw-r–r– 1 root root 3027 1月 17 14:45 corosync.conf
-rw-r–r– 1 root root 2881 11月 7 18:09 corosync.conf.example
-rw-r–r– 1 root root 767 11月 7 18:09 corosync.conf.example.udpu
-rw-r–r– 1 root root 3278 11月 7 18:09 corosync.xml.example
drwxr-xr-x 2 root root 6 11月 7 18:09 uidgid.d
开启corosync和pacemaker服务
corosync]# ansible all -m service -a "name=corosync state=started"
corosync]# ansible all -m service -a "name=pacemaker state=started"
节点中查看服务状态
~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 14:51:41 2017 Last change: Tue Jan 17 14:51:11 2017 by h
acluster via crmd on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ] 两个节点全部online
No resources
4、使用crmsh配置群集和群集资源
crmsh:
获取帮助:ls,help
help COMMAND
COMMAND –help
查看集群状态
status [<option> …]
option :: bynode | inactive | ops | timing | failcounts
设定及管理集群:
cluster
配置CIB:
configure/ CIB configuration
acl_target Define target access rights
_test Help for command _test
clone Define a clone
colocation Colocate resources
commit Commit the changes to the CIB
default-timeouts Set timeouts for operations to minimums from the meta-data
delete Delete CIB objects
edit Edit CIB objects
erase Erase the CIB
fencing_topology Node fencing order
filter Filter CIB objects
graph Generate a directed graph
group Define a group
load Import the CIB from a file
location A location preference
modgroup Modify group
monitor Add monitor operation to a primitive
ms Define a master-slave resource
node Define a cluster node
op_defaults Set resource operations defaults
order Order resources
primitive Define a resource
property Set a cluster property
ptest Show cluster actions if changes were committed
refresh Refresh from CIB
_regtest Help for command _regtest
rename Rename a CIB object
role Define role access rights
rsc_defaults Set resource defaults
rsc_template Define a resource template
rsc_ticket Resources ticket dependency
rsctest Test resources as currently configured
save Save the CIB to a file
schema Set or display current CIB RNG schema
show Display CIB objects
_objects Help for command _objects
tag Define resource tags
upgrade Upgrade the CIB to version 1.0
user Define user access rights
verify Verify the CIB with crm_verify
xml Raw xml
cib CIB shadow management
cibstatus CIB status management and editing
template Edit and import a configuration from a template
管理RA:
ra/Resource Agents (RA) lists and documentation
classes List classes and providers
info Show meta data for a RA
list List RA for a class (and provider)
providers Show providers for a RA and a class
节点管理:
node/ Nodes management
attribute Manage attributes
clearstate Clear node state
delete Delete node
fence Fence node
maintenance Put node into maintenance mode
online Set node online
ready Put node into ready mode
show Show node
standby Put node into standby
status Show nodes' status as XML
status-attr Manage status attributes
utilization Manage utilization attributes
资源管理:
resource/ Resource management
cleanup Cleanup resource status
demote Demote a master-slave resource
failcount Manage failcounts
maintenance Enable/disable per-resource maintenance mode
manage Put a resource into managed mode
meta Manage a meta attribute
migrate Migrate a resource to another node
param Manage a parameter of a resource
promote Promote a master-slave resource
refresh Refresh CIB from the LRM status
reprobe Probe for resources not started by the CRM
restart Restart a resource
scores Display resource scores
secret Manage sensitive parameters
start Start a resource
status Show status of resources
stop Stop a resource
trace Start RA tracing
unmanage Put a resource into unmanaged mode
unmigrate Unmigrate a resource to another node
untrace Stop RA tracing
utilization Manage a utilization attribute
配置集群:
配置集群属性:property
配置资源的默认属性:rsc_defaults
配置集群资源:
premitive
group
clone
ms/master
配置约束:
location
colocation
order
示例:配置一个高可用的httpd服务
组成资源:vip,httpd,[filesystem]
vip:IPaddr,IPaddr2
httpd:systemd httpd unit file
filesystem:Filesystem
约束:
colocation,group
order
共享存储:
集中式存储:
NAS:Network Attached Storage,file
File Server:NFS,CIFS
SAN:Storage Area Network,block
FC SAN
IP SAN
…
SAN的共享挂载
集群文件系统(dlm):
GFS2:Global File System
OCFS2:Oracle Cluster File System
分布式存储:
GlusterFS,Ceph,MogileFS,MooseFS,HDFS
crm也分为命令行和交换式两种,使用命令行查看当前状态
~]# crm_mon
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 14:53:34 2017 Last change: Tue Jan 17 14:51:11 2017 by h
acluster via crmd on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No active resources
交互式查看
~]# crm
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 14:54:40 2017 Last change: Tue Jan 17 14:51:11 2017 by h
acluster via crmd on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No resources
crm(live)# ra classes 查看ra资源类型
lsb
ocf / .isolation heartbeat openstack pacemaker
service
systemd
crm(live)ra# list lsb 使用list命令查看ra类型中的所执行的应用
netconsole network
crm(live)ra# list systemd
NetworkManager NetworkManager-wait-online
auditd brandbot
corosync cpupower
crond dbus
display-manager dm-event
dracut-shutdown emergency
exim getty@tty1
ip6tables iptables
irqbalance kdump
kmod-static-nodes ldconfig
lvm2-activation lvm2-lvmetad
lvm2-lvmpolld lvm2-monitor
lvm2-pvscan@8:2 microcode
network pacemaker
plymouth-quit plymouth-quit-wait
plymouth-read-write plymouth-start
polkit postfix
rc-local rescue
rhel-autorelabel rhel-autorelabel-mark
rhel-configure rhel-dmesg
rhel-import-state rhel-loadmodules
rhel-readonly rsyslog
sendmail sshd
sshd-keygen syslog
systemd-ask-password-console systemd-ask-password-plymouth
systemd-ask-password-wall systemd-binfmt
systemd-firstboot systemd-fsck-root
systemd-hwdb-update systemd-initctl
systemd-journal-catalog-update systemd-journal-flush
systemd-journald systemd-logind
systemd-machine-id-commit systemd-modules-load
systemd-random-seed systemd-random-seed-load
systemd-readahead-collect systemd-readahead-done
systemd-readahead-replay systemd-reboot
systemd-remount-fs systemd-rfkill@rfkill2
systemd-shutdownd systemd-sysctl
systemd-sysusers systemd-tmpfiles-clean
systemd-tmpfiles-setup systemd-tmpfiles-setup-dev
systemd-udev-trigger systemd-udevd
systemd-update-done systemd-update-utmp
systemd-update-utmp-runlevel systemd-user-sessions
systemd-vconsole-setup tuned
wpa_supplicant
crm(live)ra# list ocf
CTDB ClusterMon Delay
Dummy Filesystem HealthCPU
HealthSMART IPaddr IPaddr2
IPsrcaddr LVM MailTo
NovaEvacuate Route SendArp
Squid Stateful SysInfo
SystemHealth VirtualDomain Xinetd
apache clvm conntrackd
controld db2 dhcpd
docker ethmonitor exportfs
galera garbd iSCSILogicalUnit
iSCSITarget iface-vlan mysql
nagios named nfsnotify
nfsserver nginx nova-compute-wait
oracle oralsnr pgsql
ping pingd portblock
postfix rabbitmq-cluster redis
remote rsyncd slapd
symlink tomcat
crm(live)ra# list ocf heartbeat
CTDB Delay Dummy
Filesystem IPaddr IPaddr2
IPsrcaddr LVM MailTo
Route SendArp Squid
VirtualDomain Xinetd apache
clvm conntrackd db2
dhcpd docker ethmonitor
exportfs galera garbd
iSCSILogicalUnit iSCSITarget iface-vlan
mysql nagios named
nfsnotify nfsserver nginx
oracle oralsnr pgsql
portblock postfix rabbitmq-cluster
redis rsyncd slapd
symlink tomcat
crm(live)ra# list ocf pacemaker
ClusterMon Dummy HealthCPU HealthSMART
Stateful SysInfo SystemHealth controld
ping pingd remote
crm(live)ra# list ocf openstack
NovaEvacuate nova-compute-wait
crm(live)ra# info ocf:heartbeat:IPaddr 使用info命令查看具体使用方法
crm(live)node# ls 节点模式,可以控制节点的standby 上线等动作
.. help fence
show attribute back
cd ready status-attr
quit end utilization
exit ls maintenance
online bye ?
status clearstate standby
list up server
delete
crm(live)node# standby 不加参数即为当前节点作为standby服务器
crm(live)node# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:07:44 2017 Last change: Tue Jan 17 15:07:40 2017 by r
oot via crm_attribute on node1.com
2 nodes and 0 resources configured
Node node1.com: standby 此时node1状态以及为standby
Online: [ node2.com ]
No resources
crm(live)# node
crm(live)node# online 使用online命令进行上线
crm(live)node# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:08:36 2017 Last change: Tue Jan 17 15:08:33 2017 by r
oot via crm_attribute on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No resources
也可以直接在最外层进行操作
crm(live)# node online node2.com
crm(live)# status
Stack: corosync
n with quorum
15 09:48:17 2017 by root via crm_attribute on node1.com
2 nodes and 0 resources configured
Online: [ node1.com node2.com ]
No resources
使用configure进入配置模式
crm(live)# configure
crm(live)configure# ls
.. get_property cibstatus
primitive set validate_all
help rsc_template ptest
back cd default-timeouts
erase validate-all rsctest
rename op_defaults modgroup
xml quit upgrade
group graph load
master location template
save collocation rm
bye clone ?
ls node default_timeouts
exit acl_target colocation
fencing_topology assist alert
ra schema user
simulate rsc_ticket end
role rsc_defaults monitor
cib property resource
edit show up
refresh order filter
get-property tag ms
verify commit history
delete
location定义资源粘性
property定义群集全局属性
crm(live)# configure
crm(live)configure# property 使用tab键可以查看property的具体内容和使用方法
batch-limit= node-health-strategy=
cluster-delay= node-health-yellow=
cluster-recheck-interval= notification-agent=
concurrent-fencing= notification-recipient=
crmd-transition-delay= pe-error-series-max=
dc-deadtime= pe-input-series-max=
default-action-timeout= pe-warn-series-max=
default-resource-stickiness= placement-strategy=
election-timeout= remove-after-stop=
enable-acl= shutdown-escalation=
enable-startup-probes= start-failure-is-fatal=
have-watchdog= startup-fencing=
is-managed-default= stonith-action=
load-threshold= stonith-enabled=
maintenance-mode= stonith-timeout=
migration-limit= stonith-watchdog-timeout=
no-quorum-policy= stop-all-resources=
node-action-limit= stop-orphan-actions=
node-health-green= stop-orphan-resources=
node-health-red= symmetric-cluster=
crm(live)configure# property no-quorum-policy=
t have quorum
: stop, freeze, ignore, suicide
crm(live)configure# property no-quorum-policy=stop
crm(live)configure# show 查看当前设置
node 1: node1.com
node 2: node2.com \
attributes standby=off
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop
关闭stonith设备
crm(live)configure# property stonith-enabled=false
定义群集ip
crm(live)configure# primitive webip ocf:heartbeat:IPaddr params ip=192.168.150.80
使用verify进行配置验证
crm(live)configure# verify
使用commit进行配置确认并生效
crm(live)configure# commit
crm(live)configure# show 查看配置
node 1: node1.com \
attributes standby=off
node 2: node2.com
primitive webip IPaddr \
params ip=192.168.150.80
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
查看状态
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:23:58 2017 Last change: Tue Jan 17 15:23:55 2017 by r
oot via cibadmin on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com 此时webip这个群集资源在node1上
可以通过ip addr 在node1上进行认证
node1 ~]# ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:98:ad:a4 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.137/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.80/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe98:ada4/64 scope link
valid_lft forever preferred_lft forever
让node1切换成standby,查看资源是否会迁移至node2
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:26:43 2017 Last change: Tue Jan 17 15:26:40 2017 by r
oot via crm_attribute on node1.com
2 nodes and 1 resource configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com webip资源以及迁移至node2
验证
node2 ~]# ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:f3:13:56 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.138/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.80/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fef3:1356/64 scope link
valid_lft forever preferred_lft forever
对node1进行online操作,由于没有设定组合粘性,资源还是会停留在node2
[root@node1 ~]# crm node online
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:30:05 2017 Last change: Tue Jan 17 15:30:02 2017 by r
oot via crm_attribute on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
可以使用migrate进行资源转移
crm(live)resource# migrate webip node1.com
INFO: Move constraint created for webip to node2.com
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:37:00 2017 Last change: Tue Jan 17 15:36:49 2017 by r
oot via crm_resource on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
在resource中可以进行资源的停止开启和删除
crm(live)resource# stop webip
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:38:52 2017 Last change: Tue Jan 17 15:38:50 2017 by r
oot via cibadmin on node1.com
2 nodes and 1 resource configured: 1 resource DISABLED and 0 BLOCKED from being started du
e to failures
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Stopped (disabled)
crm(live)# resource
crm(live)resource# start webip
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:39:03 2017 Last change: Tue Jan 17 15:39:00 2017 by r
oot via cibadmin on node1.com
2 nodes and 1 resource configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
5、配置集群http服务
使用ansible进行各节点httpd安装
~]# ansible all -m yum -a "name=httpd state=present"
各节点编辑一个测试页面
vim /var/www/html/index.html
node1
node2
将httpd装在至unitfile
~]# ansible all -m shell -a 'systemctl enable httpd.service'
192.168.150.138 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib
/systemd/system/httpd.service.
192.168.150.137 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/httpd.service to /usr/lib
/systemd/system/httpd.service.
使用crmsh进行httpd资源添加
[root@node1 ~]# crm
crm(live)# ra
crm(live)ra# list systemd 查看systemd中是否已经有httpd
查看httpd资源的默认建议属性
crm(live)ra# info systemd:httpd
systemd unit file for httpd (systemd:httpd)
The Apache HTTP Server
Operations' defaults (advisory minimum):
start timeout=100
stop timeout=100
status timeout=100
monitor timeout=100 interval=60
进行资源配置
crm(live)# configure
crm(live)configure# primitive webserver systemd:httpd op start timeout=100 op stop timeout
=100crm(live)configure# verify
crm(live)configure# commit
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:54:45 2017 Last change: Tue Jan 17 15:54:32 2017 by r
oot via cibadmin on node1.com
2 nodes and 2 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
webserver (systemd:httpd): Started node2.com
此时我们两个资源在不同的节点上生成,这个对于httpd群集来说肯定是有问题的
所以我们要定义资源组group将这两个资源捆绑在一起
crm(live)configure# group webservice webip webserver 此时在资源组中配置的资源顺序为资源开启的先后顺序
INFO: modified location:cli-prefer-webip from webip to webservice
crm(live)configure# show
node 1: node1.com \
attributes standby=off
node 2: node2.com \
attributes standby=off
primitive webip IPaddr \
params ip=192.168.150.80 \
meta target-role=Started
primitive webserver systemd:httpd \
op start timeout=100 interval=0 \
op stop timeout=100 interval=0
group webservice webip webserver
location cli-prefer-webip webservice role=Started inf: node1.com
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
crm(live)configure# verify
crm(live)configure# commit
crm(live)configure# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:58:30 2017 Last change: Tue Jan 17 15:58:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 2 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
进行群集功能验证:
node1进行standby操作
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:59:18 2017 Last change: Tue Jan 17 15:59:15 2017 by r
oot via crm_attribute on node1.com
2 nodes and 2 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webserver (systemd:httpd): Stopped
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 15:59:21 2017 Last change: Tue Jan 17 15:59:15 2017 by r
oot via crm_attribute on node1.com
2 nodes and 2 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webserver (systemd:httpd): Started node2.com
6、添加共享存储资源
使用nfs作为共享存储,模拟共享存储群集
共享存储主机进行nfs配置:
yum -y install nfs-utils
[root@localhost ~]# mkdir /www/html -pv
mkdir: 已创建目录 "/www"
mkdir: 已创建目录 "/www/html"
[root@localhost ~]# vim /etc/exports
[root@localhost ~]# cat /etc/exports
/www/html 192.168.150.0/24(rw,no_root_squash)
[root@localhost ~]# systemctl start nfs.service
[root@localhost ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 64 *:39439 *:*
LISTEN 0 128 *:111 *:*
LISTEN 0 128 *:20048 *:*
LISTEN 0 128 *:33073 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 64 *:2049 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::20048 :::*
LISTEN 0 128 :::58611 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
LISTEN 0 64 :::2049 :::*
LISTEN 0 64 :::59877 :::*
ansible进行各节点nfs挂载测试
~]# ansible all -m yum -a "name=nfs-utils state=present"
~]# ansible all -m shell -a 'mount -t nfs 192.168.150.139:/www/html /var/w
ww/html'192.168.150.137 | SUCCESS | rc=0 >>
192.168.150.138 | SUCCESS | rc=0 >
节点确认
~]# df -h
文件系统 容量 已用 可用 已用% 挂载点
/dev/mapper/centos-root 28G 8.5G 20G 31% /
devtmpfs 479M 0 479M 0% /dev
tmpfs 489M 54M 436M 11% /dev/shm
tmpfs 489M 6.8M 483M 2% /run
tmpfs 489M 0 489M 0% /sys/fs/cgroup
/dev/sda1 497M 125M 373M 25% /boot
tmpfs 98M 0 98M 0% /run/user/0
192.168.150.139:/www/html 28G 8.4G 20G 31% /var/www/html
卸载
~]# ansible all -m shell -a 'umount /var/www/html'
192.168.150.138 | SUCCESS | rc=0 >>
192.168.150.137 | SUCCESS | rc=0 >>
store资源配置
[root@node1 ~]# crm
crm(live)# configure
crm(live)configure# primitive webstore ocf:heartbeat:Filesystem params device="192.168.150
.139:/www/html" directory="/var/www/html" fstype=nfs op start timeout=60 op stop timeout=60crm(live)configure# verify
crm(live)configure# cd
There are changes pending. Do you want to commit them (y/n)? y
crm(live)# resource
crm(live)resource# stop webservice
Do you want to override target-role for child resource webip (y/n)? y
crm(live)resource# cd
crm(live)# configure
crm(live)configure# delete webservice
INFO: modified location:cli-prefer-webip from webservice to webip
crm(live)configure# show
node 1: node1.com \
attributes standby=off
node 2: node2.com \
attributes standby=off
primitive webip IPaddr \
params ip=192.168.150.80
primitive webserver systemd:httpd \
op start timeout=100 interval=0 \
op stop timeout=100 interval=0
primitive webstore Filesystem \
params device="192.168.150.139:/www/html" directory="/var/www/html" fstype=nfs \
op start timeout=60 interval=0 \
op stop timeout=60 interval=0
location cli-prefer-webip webip role=Started inf: node1.com
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
crm(live)configure# group webservice webip webstore webserver
INFO: modified location:cli-prefer-webip from webip to webservice
crm(live)configure# verify
crm(live)configure# commit
查看状态
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 16:22:12 2017 Last change: Tue Jan 17 16:21:44 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node1.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
验证群集
[root@node1 ~]# vim /var/www/html/index.html
[root@node1 ~]# cat /var/www/html/index.html
<h1>nfs server</h1>
[root@node1 ~]# curl http://192.168.150.80
<h1>nfs server</h1>
将节点1standby
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 16:24:47 2017 Last change: Tue Jan 17 16:24:44 2017 by r
oot via crm_attribute on node1.com
2 nodes and 3 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node2.com
webserver (systemd:httpd): Stopped
[root@node1 ~]# curl http://192.168.150.80 还是可以正常访问,集群正常
<h1>nfs server</h1>
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 16:24:59 2017 Last change: Tue Jan 17 16:24:44 2017 by r
oot via crm_attribute on node1.com
2 nodes and 3 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node2.com
webserver (systemd:httpd): Started node2.com
节点1开启
crm node online
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 16:26:16 2017 Last change: Tue Jan 17 16:26:11 2017 by r
oot via crm_attribute on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
Resource Group: webservice
webip (ocf::heartbeat:IPaddr): Started node1.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
7、资源location设定并测试
删除之前定义的组,进行每个资源的粘性设定
crm(live)# resource
crm(live)resource# stop webservice
crm(live)# configure
crm(live)configure# delete webservice
INFO: modified location:cli-prefer-webip from webservice to webip
crm(live)configure# commit
设定location
crm(live)configure# location webip_pre_node1 webip 50: node1.com
crm(live)configure# show
node 1: node1.com \
attributes standby=off
node 2: node2.com \
attributes standby=off
primitive webip IPaddr \
params ip=192.168.150.80
primitive webserver systemd:httpd \
op start timeout=100 interval=0 \
op stop timeout=100 interval=0
primitive webstore Filesystem \
params device="192.168.150.139:/www/html" directory="/var/www/html" fstype=nfs \
op start timeout=60 interval=0 \
op stop timeout=60 interval=0
location cli-prefer-webip webip role=Started inf: node1.com
location webip_pre_node1 webip 50: node1.com
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=1.1.15-11.el7_3.2-e174ec8 \
cluster-infrastructure=corosync \
cluster-name=mycluster \
no-quorum-policy=stop \
stonith-enabled=false
crm(live)configure# verify
crm(live)configure# commit
状态查看
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 16:32:58 2017 Last change: Tue Jan 17 16:31:44 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node1.com
webstore (ocf::heartbeat:Filesystem): Started node2.com
webserver (systemd:httpd): Started node1.com
默认粘性查看,默认为0
crm(live)# configure
crm(live)configure# property
batch-limit= node-health-strategy=
cluster-delay= node-health-yellow=
cluster-recheck-interval= notification-agent=
concurrent-fencing= notification-recipient=
crmd-transition-delay= pe-error-series-max=
dc-deadtime= pe-input-series-max=
default-action-timeout= pe-warn-series-max=
default-resource-stickiness= placement-strategy=
election-timeout= remove-after-stop=
enable-acl= shutdown-escalation=
enable-startup-probes= start-failure-is-fatal=
have-watchdog= startup-fencing=
is-managed-default= stonith-action=
load-threshold= stonith-enabled=
maintenance-mode= stonith-timeout=
migration-limit= stonith-watchdog-timeout=
no-quorum-policy= stop-all-resources=
node-action-limit= stop-orphan-actions=
node-health-green= stop-orphan-resources=
node-health-red= symmetric-cluster=
crm(live)configure# property default-resource-stickiness=
default-resource-stickiness (integer, [0]):
此处有个注意点:
有一条默认配置设定了node1的location,先进行删除进行测试
location cli-prefer-webip webip role=Started inf: node1.com inf是无穷大
可以在configure模式输入edit命令,类似于进入了一个vim模式,可以对配置进行手动修改
crm(live)configure# verify
crm(live)configure# commit
此时设定一个node2的location值大于node1进行测试
location webip_pre_node2 webip 100: node2.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 21:12:40 2017 Last change: Tue Jan 17 21:11:25 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
此时webip的资源已经迁移至node2节点
8、colocation的设定及测试
crm(live)# configure
crm(live)configure# colocation webserver_with_webip inf: webserver webip 定义了两个资源之间的粘性,必须在一起
crm(live)configure# verify
crm(live)configure# commit
crm(live)configure# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 21:16:11 2017 Last change: Tue Jan 17 21:16:09 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 21:16:50 2017 Last change: Tue Jan 17 21:16:09 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
9、顺序约束order
crm(live)configure# order webip_bef_webstore_bef_webserver mandatory: webip webstore webse 强制资源启动顺序
rvercrm(live)configure# verify
crm(live)configure# commit
crm(live)configure# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 22:08:26 2017 Last change: Tue Jan 17 22:08:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 22:08:39 2017 Last change: Tue Jan 17 22:08:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 22:08:40 2017 Last change: Tue Jan 17 22:08:24 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
10、定义带有监控的资源
由于2节点群集的话会出现法定票数不足导致资源不转移的情况,解决此方法有一下几种:
增加ping node节点
增加一个仲裁盘
让群集中的节点数成奇数个
直接忽略当集群没有法定票数时直接忽略,使用此方法必须得对资源进行监控
crm(live)configure# property no-quorum-policy=
no-quorum-policy (enum, [stop]): What to do when the cluster does not have quorum
What to do when the cluster does not have quorum Allowed values: stop, freeze, ignore
, suicide
crm(live)configure# property no-quorum-policy=ignore
crm(live)configure# verify
crm(live)configure# commit
定义资源监控
crm(live)configure# primitive webserver systemd:httpd op start timeout=100 op stop timeout=100 op monitor interval=60 timeout=100
手动关闭httpd服务
[root@node1 ~]# killall httpd
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 22:26:31 2017 Last change: Tue Jan 17 22:23:51 2017 by root via cibadmin on node2.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
root@node1 ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:111 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 64 *:43550 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
LISTEN 0 64 :::36414 :::*
60s后会自动启动
root@node1 ~]# ss -tnl
State Recv-Q Send-Q Local Address:P
LISTEN 0 128 *:1
LISTEN 0 128 *:2
LISTEN 0 100 127.0.0.1:2
LISTEN 0 64 *:4
LISTEN 0 128 :::1
LISTEN 0 128 :::8
LISTEN 0 128 :::2
LISTEN 0 100 ::1:2
LISTEN 0 64 :::3
[root@node1 ~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 22:30:24 2017 Last change: Tue Jan 17 22:23:51 2017 by r
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
Failed Actions: 监控后会出现此错误信息
* webserver_monitor_60000 on node1.com 'not running' (7): call=66, status=complete, exitre
last-rc-change='Tue Jan 17 22:26:53 2017', queued=0ms, exec=0ms
使用cleanup可以进行错误信息删除
[root@node1 ~]# crm
crm(live)# resource
crm(live)resource# cleaup webserver
Cleaning up webserver on node1.com, removing fail-count-webserver
Cleaning up webserver on node2.com, removing fail-count-webserver
Waiting for 2 replies from the CRMd.. OK
crm(live)resource# cd
crm(live)# status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Tue Jan 17 22:33:56 2017 Last change: Tue Jan 17 22:33:52 2017 by h
acluster via crmd on node2.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node1.com
将node上配置httpd的配置文档修改,造成无法正常启动httpd,看资源是否会迁移至node2
~]# mv /etc/httpd/conf/httpd.conf /etc/httpd/conf/httpd.conf.bak
[root@node1 ~]# killall httpd
[root@node1 ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:111 *:*
LISTEN 0 64 *:47028 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
LISTEN 0 64 :::60901 :::*
由于资源在node1上无法自己启动,所有在node2上启动
[root@node2 ~]# ss -tnl
State Recv-Q Send-Q Local Address:Port Peer Address:Port
LISTEN 0 128 *:111 *:*
LISTEN 0 128 *:22 *:*
LISTEN 0 100 127.0.0.1:25 *:*
LISTEN 0 128 :::111 :::*
LISTEN 0 128 :::80 :::*
LISTEN 0 128 :::22 :::*
LISTEN 0 100 ::1:25 :::*
]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Wed Jan 18 11:03:15 2017 Last change: Wed Jan 18 10:56:07 2017 by r
oot via cibadmin on node1.com
2 nodes and 3 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
webip (ocf::heartbeat:IPaddr): Started node2.com
webstore (ocf::heartbeat:Filesystem): Started node1.com
webserver (systemd:httpd): Started node2.com
Failed Actions:
* webserver_start_0 on node1.com 'not running' (7): call=86, status=complete, exitreason='
none', last-rc-change='Wed Jan 18 10:59:01 2017', queued=0ms, exec=2106ms
当重新恢复httpd服务后记得清除资源的错误信息,否则无法启动资源
[root@node1 ~]# crm
crm(live)# resource
crm(live)resource# cleaup webserver
10、高可用LVS中的DRserver
需要借助ldirectord来实现
ansible主机进行两节点ldirectord的部署
~]# ansible all -m copy -a "src=/root/ldirectord-3.9.6-0rc1.1.1.x86_64.rpm des t=/root/ldirectord-3.9.6-0rc1.1.1.x86_64.rpm"
~]# ansible all -m shell -a 'yum -y install ldirectord-3.9.6-0rc1.1.1.x86_ 64.rpm'
节点主机确认安装是否正常
[root@node1 ~]# rpm -qa ldirectord
ldirectord-3.9.6-0rc1.1.1.x86_64
ansible主机编辑配置文档并部署至节点主机
yum -y install ldirectord-3.9.6-0rc1.1.1.x86_ 64.rpm
~]# cp /usr/share/doc/ldirectord-3.9.6/ldirectord.cf /etc/ha.d/
~]# cd /etc/ha.d/
ha.d]# vim ldirectord.cf
ha.d]# grep -v "^#" ldirectord.cf | grep -v "^$"
checktimeout=3
checkinterval=1
autoreload=yes
quiescent=no
virtual=192.168.150.81:80 定义VIP
real=192.168.150.7:80 gate 定义realserver的ip地址
real=192.168.150.8:80 gate
real=192.168.6.6:80 gate
fallback=127.0.0.1:80 gate 定义sorryserver,本地
service=http 服务
scheduler=rr 调度算法
#persistent=600
#netmask=255.255.255.255
protocol=tcp
checktype=negotiate
checkport=80
request="index.html"
receive="Test Page"
ha.d]# ansible all -m copy -a "src=/etc/ha.d/ldirectord.cf dest=/etc/ha.d/ldirectord.cf"
将服务添加至systemd中
ha.d]# ansible all -m shell -a 'systemctl enable ldirectord.service'
192.168.150.137 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/ldirectord.service to /usr/lib/systemd/system/ldirectord.service.
192.168.150.138 | SUCCESS | rc=0 >>
Created symlink from /etc/systemd/system/multi-user.target.wants/ldirectord.service to /usr/lib/systemd/system/ldirectord.service.
节点上进行服务开启测试
~]# systemctl start ldirectord.service
~]# systemctl status ldirectord.service
● ldirectord.service – Monitor and administer real servers in a LVS cluster of load balanced virtual servers
Loaded: loaded (/usr/lib/systemd/system/ldirectord.service; enabled; vendor preset: disabled)
Active: active (running) since 三 2017-01-18 11:31:21 CST; 9s ago
Process: 17474 ExecStartPost=/usr/bin/touch /var/lock/subsys/ldirectord (code=exited, status=0/SUCCESS)
Process: 17472 ExecStart=/usr/sbin/ldirectord start (code=exited, status=0/SUCCESS)
Main PID: 17476 (ldirectord)
CGroup: /system.slice/ldirectord.service
└─17476 /usr/bin/perl -w /usr/sbin/ldirectord start
~]# ipvsadm -Ln drserver可正常开启
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.150.81:80 rr
-> 127.0.0.1:80 Route 1 0 0
测试前清空之前测试的所有配置
resource中进行stop资源,cleanup资源,configure中进行edit进行配置删除
重新定义VIP资源和ldirector的群集资源,两资源同时在drservice群组中
crm(live)configure# primitive vip ocf:heartbeat:IPaddr2 params ip=192.168.150.81
crm(live)configure# primitive director systemd:ldirectord op start timeout=100 op stop tim eout=100
crm(live)configure# group drservice vip director
crm(live)configure# verify
crm(live)configure# commit
~]# crm status
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Wed Jan 18 11:42:38 2017 Last change: Wed Jan 18 11:42:09 2017 by r
oot via cibadmin on node1.com
2 nodes and 2 resources configured
Online: [ node1.com node2.com ]
Full list of resources:
Resource Group: drservice
vip (ocf::heartbeat:IPaddr2): Started node1.com
director (systemd:ldirectord): Started node1.com
lvs状态
~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.150.81:80 rr
-> 127.0.0.1:80 Route 1 0 0
vip已经在node1上启动
[root@node1 ~]# ip addr show
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:98:ad:a4 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.137/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.81/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fe98:ada4/64 scope link
valid_lft forever preferred_lft forever
关闭节点1进行测试
[root@node1 ~]# crm node standby
[root@node1 ~]# crm status 资源全部迁移至node2
Stack: corosync
Current DC: node1.com (version 1.1.15-11.el7_3.2-e174ec8) – partition with quorum
Last updated: Wed Jan 18 11:45:08 2017 Last change: Wed Jan 18 11:44:57 2017 by r
oot via crm_attribute on node1.com
2 nodes and 2 resources configured
Node node1.com: standby
Online: [ node2.com ]
Full list of resources:
Resource Group: drservice
vip (ocf::heartbeat:IPaddr2): Started node2.com
director (systemd:ldirectord): Started node2.com
此时查看node2上的资源组状态
lvs到了节点2上 由于我刚才配置的realserver是网络不通的,所有在应用的是sorry server
[root@node2 ~]# ipvsadm -Ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.150.81:80 rr
-> 127.0.0.1:80 Route 1 0 0
[root@node2 ~]# ip addr vip也过来了
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eno16777736: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP qlen
1000 link/ether 00:0c:29:f3:13:56 brd ff:ff:ff:ff:ff:ff
inet 192.168.150.138/24 brd 192.168.150.255 scope global eno16777736
valid_lft forever preferred_lft forever
inet 192.168.150.81/24 brd 192.168.150.255 scope global secondary eno16777736
valid_lft forever preferred_lft forever
inet6 fe80::20c:29ff:fef3:1356/64 scope link
valid_lft forever preferred_lft forever
原创文章,作者:N23-苏州-void,如若转载,请注明出处:http://www.178linux.com/66815
评论列表(1条)
很不错的总结文档 ,再接再励。