Top Menu

Saturday, June 22, 2013

Step by Step HA - Cluster configuration


Hosts File & Network Interface Configuration on Both Nodes:

[root@node01 ~]# cat /etc/hosts
# Do not remove the following line, or various programs
# that require network functionality will fail.
127.0.0.1 localhost.localdomain localhost
192.168.11.11node01.example.com node01
192.168.11.12 node02 .example.com node02
192.168.11.10 santst01.example.com santst01
192.168.111.11 nfsshared.example.com nfsshared

[root@node01 ~]# ifconfig
eth0 Link encap:Ethernet HWaddr 00:0C:29:A0:0B:0E
inet addr:192.168.11.11Bcast:192.168.11.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fea0:b0e/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:3311 errors:0 dropped:0 overruns:0 frame:0
TX packets:3062 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:2030237 (1.9 MiB) TX bytes:789280 (770.7 KiB)
Base address:0x2000 Memory:d8920000-d8940000
eth1 Link encap:Ethernet HWaddr 00:0C:29:A0:0B:18
inet addr:192.168.111.11 Bcast:192.168.111.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fea0:b18/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:42 errors:0 dropped:0 overruns:0 frame:0
TX packets:12 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:11647 (11.3 KiB) TX bytes:720 (720.0 b)
Base address:0x2040 Memory:d8940000-d8960000
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:12 errors:0 dropped:0 overruns:0 frame:0
TX packets:12 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:900 (900.0 b) TX bytes:900 (900.0 b)

[root@node02~]# cat /etc/hosts
# Do not remove the following line, or various programs
# that require network functionality will fail.
127.0.0.1 localhost.localdomain localhost
192.168.11.11node01.example.com node01
192.168.11.12 node02 .example.com node02
192.168.11.10 santst01.example.com santst01
192.168.111.11 nfsshared.example.com nfsshared

[root@node02~]# ifconfig
eth0 Link encap:Ethernet HWaddr 00:0C:29:F2:A5:86
inet addr:192.168.11.12 Bcast:192.168.11.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fef2:a586/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:7151 errors:0 dropped:0 overruns:0 frame:0
TX packets:5472 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:6534127 (6.2 MiB) TX bytes:640372 (625.3 KiB)
Base address:0x2000 Memory:d8920000-d8940000
eth1 Link encap:Ethernet HWaddr 00:0C:29:F2:A5:90
inet addr:192.168.111.12 Bcast:192.168.111.255 Mask:255.255.255.0
inet6 addr: fe80::20c:29ff:fef2:a590/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:42 errors:0 dropped:0 overruns:0 frame:0
TX packets:12 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:11455 (11.1 KiB) TX bytes:720 (720.0 b)
Base address:0x2040 Memory:d8940000-d8960000
lo Link encap:Local Loopback
inet addr:127.0.0.1 Mask:255.0.0.0
inet6 addr: ::1/128 Scope:Host
UP LOOPBACK RUNNING MTU:16436 Metric:1
RX packets:18 errors:0 dropped:0 overruns:0 frame:0
TX packets:18 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:0
RX bytes:1348 (1.3 KiB) TX bytes:1348 (1.3 KiB)

Note: Both Nodes local Time should be Synchronized with the NTP Server, please refer NTP Server and Client Configuration for more info.

Check Install Packages on Both Nodes:

[root@node01 ~]# rpm -q cman rgmanager clvmd gfs2-utils gfs-utils lvm2-cluster
cman-2.0.115-1.el5
rgmanager-2.0.52-1.el5
package clvmd is not installed
gfs2-utils-0.1.62-1.el5
gfs-utils-0.1.20-1.el5
lvm2-cluster-2.02.46-8.el5


[root@node02 ~]# rpm -q cman rgmanager clvmd gfs2-utils gfs-utils lvm2-cluster
cman-2.0.115-1.el5
rgmanager-2.0.52-1.el5
package clvmd is not installed
gfs2-utils-0.1.62-1.el5
gfs-utils-0.1.20-1.el5
lvm2-cluster-2.02.46-8.el5
Populated cluster.conf Cluster Configuration file on Both Nodes:

[root@node01 ~]# cat /etc/cluster/cluster.conf
<?xml version="1.0"?>
<cluster alias="cluster8" config_version="29" name="cluster8">
<quorumd device="/dev/QVG01/QLV01" interval="1" label="QDISK" min_score="3" tko="10" votes="3">
<heuristic interval="2" program="/bin/ping 192.168.11.1 -c2 -t1" score="1"/>
</quorumd>
<fence_daemon clean_start="0" post_fail_delay="0" post_join_delay="3"/>
<clusternodes>
<clusternode name="node02 .example.com" nodeid="1" votes="1">
<fence>
<method name="1">
<device name="XFence" nodename="node02 .example.com"/>
</method>
</fence>
</clusternode>
<clusternode name="node01.example.com" nodeid="2" votes="1">
<fence>
<method name="1">
<device name="XFence" nodename="node01.example.com"/>
</method>
</fence>
</clusternode>
</clusternodes>
<cman>
<multicast addr="224.0.0.251"/>
</cman>
<fencedevices>
<fencedevice agent="fence_manual" name="XFence"/>
</fencedevices>
<rm>
<failoverdomains>
<failoverdomain name="prefer_node01" nofailback="0" ordered="1" restricted="0">
<failoverdomainnode name="node02 .example.com" priority="2"/>
<failoverdomainnode name="node01.example.com" priority="1"/>
</failoverdomain>
</failoverdomains>
<resources>
<ip address="192.168.11.111/24" monitor_link="0"/>
<script file="/etc/init.d/nfs" name="nfsexpt"/>
</resources>
<service autostart="1" domain="prefer_node01" exclusive="0" name="mynfs" recovery="relocate">
<ip ref="192.168.11.111/24">
<script ref="nfsexpt"/>
</ip>
</service>
</rm>
</cluster>

[root@node01 ~]# scp /etc/cluster/cluster.conf node02 :/etc/cluster/cluster.conf

Configuration nfs export file on Both Nodes:

[root@node01 ~]# cat /etc/exports
/oradata *(rw,no_root_squash,no_subtree_check,nsecure,fsid=0)
[root@node01 ~]# scp /etc/exports node02 : /etc/exports

Start cman service on Both Nodes:

[root@node01 ~]# /etc/init.d/cman start
Starting cluster:
Loading modules... done
Mounting configfs... done
Starting ccsd... done
Starting cman... done
Starting daemons... done
Starting fencing... done
[ OK ]


[root@node02~]# /etc/init.d/cman start
Starting cluster:
Loading modules... done
Mounting configfs... done
Starting ccsd... done
Starting cman... done
Starting daemons... done
Starting fencing... done
[ OK ]


Note: First time you should run “/etc/init.d/cman start” command on both node simultaneously

Check Cluster Status:

[root@node01 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 16:16:06 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online
node01.example.com 2 Online, Local
Start rgmanager service on Both Nodes:
[root@node01 ~]# /etc/init.d/rgmanager start
Starting Cluster Service Manager: [ OK ]
[root@node02~]# /etc/init.d/rgmanager start
Starting Cluster Service Manager: [ OK ]
[root@node01 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 16:32:14 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, rgmanager
node01.example.com 2 Online, Local, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node01.example.com started

Check NFS Service Available on node01:

[root@ node01 ~]# showmount -e 192.168.11.111
Export list for 192.168.11.111:
/oradata *
[root@ node01 ~]# showmount -e 192.168.11.11
Export list for 192.168.11.111:
/oradata *

Move NFS Service From node01 To node02Manually:

[root@node02~]# clusvcadm -r mynfs -m node02 .example.com
Trying to relocate service:mynfs to node02 .example.com...Success
service:mynfs is now running on node02 .example.com
[root@node01 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 16:32:14 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, rgmanager
node01.example.com 2 Online, Local, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node02 .example.com started


Check From NFS Client:

[root@ldap01~]# showmount -e 192.168.11.111
Export list for 192.168.11.111:
/oradata *
[root@ldap01 ~]# mount 192.168.11.111:/oradata /mnt/
[root@ldap01 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/vg_ldap-rootlv01
8.3G 2.6G 5.4G 33% /
tmpfs 497M 76K 497M 1% /dev/shm
/dev/sda1 97M 27M 66M 29% /boot
/dev/sr0 7.0G 153M 6.9G 3% /mnt
192.168.11.111:/oradata
7.0G 153M 6.9G 3% /mnt


[root@node02~]# clusvcadm -r mynfs -m node01.example.com
Trying to relocate service:mynfs to node02 .example.com...Success
service:mynfs is now running on node02 .example.com
[root@node01 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 16:32:14 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, rgmanager
node01.example.com 2 Online, Local, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node01.example.com started

[root@ldap01 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/mapper/vg_ldap-rootlv01
8.3G 2.6G 5.4G 33% /
tmpfs 497M 76K 497M 1% /dev/shm
/dev/sda1 97M 27M 66M 29% /boot
/dev/sr0 7.0G 153M 6.9G 3% /mnt
192.168.11.111:/oradata
7.0G 153M 6.9G 3% /mnt

Enable/Disable Cluster Services:

[root@node02~]# clusvcadm -d mynfs
Local machine disabling service:mynfs...Success
[root@node02~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 17:01:43 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, Local, rgmanager
node01.example.com 2 Online, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs (node01.example.com) disabled
[root@node02~]# showmount -e 192.168.11.111
mount clntudp_create: RPC: Port mapper failure - RPC: Unable to receive

Note: NFS Service is not available, You Can Manually Start/Stop NFS Service.

[root@node02]# clusvcadm -e mynfs
Local machine trying to enable service:mynfs...Success
service:mynfs is now running on node02 .example.com
[root@node02 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 17:03:13 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, Local, rgmanager
node01.example.com 2 Online, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node02 .example.com started
[root@node02 ~]# showmount -e 192.168.11.111
Export list for 192.168.11.111:
/oradata *


-d <service>
Stops and disables the user service named service
-e <service>
Enables and starts the user service named service


Freeze/Unfreeze Cluster Services:

[root@node02 ~]# clusvcadm -Z mynfs
Local machine freezing service:mynfs...Success
[root@node02 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 17:04:08 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, Local, rgmanager
node01.example.com 2 Online, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node02 .example.com started [Z]
[root@node02 ~]# showmount -e 192.168.11.111
Export list for 192.168.11.111:
/oradata *

Note: NFS Service is available, You Can Manually Start/Stop NFS Service.

[root@node02 ~]# /etc/init.d/nfs stop
Shutting down NFS mountd: [ OK ]
Shutting down NFS daemon: [ OK ]
Shutting down NFS quotas: [ OK ]
Shutting down NFS services: [ OK ]
[root@node02 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 17:04:28 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, Local, rgmanager
node01.example.com 2 Online, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node02 .example.com started [Z]
Note: I Stop NFS Service Manually, But Cluster Service Status is Start Stat.
[root@node02 ~]# clusvcadm -U mynfs
Local machine unfreezing service:mynfs...Success
[root@node02 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 17:05:07 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, Local, rgmanager
node01.example.com 2 Online, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node02 .example.com started

Note: After Unfreezing, Cluster RG Meager Detect the Cluster NFS Service is in Stop Stat. That’s why it’s failover on other node (node01.example.com) automatically.

[root@node02 ~]# clustat
Cluster Status for cluster8 @ Sat Nov 5 17:05:33 2011
Member Status: Quorate
Member Name ID Status
------ ---- ---- ------
node02 .example.com 1 Online, Local, rgmanager
node01.example.com 2 Online, rgmanager
Service Name Owner (Last) State
------- ---- ----- ------ -----
service:mynfs node01.example.com started


-Z <service>Freezes the service named service on the cluster member on which it is currently running. This will prevent status checks of the service as well as failover in the event the node fails or rgmanager is stopped.
-U <service>Unfreezes the user service named service on the cluster member on which it is currently running. This will re-enable status checks.