RSF-1 Cluster Startup modified to work with SMF and having specific customizations to work on BrickstorOS.
#!/bin/sh
# $Id: rsfrc,v 2.106 2014/04/16 10:40:12 pg Exp $
#
# Script: rsfrc
#
# Description: RSF-1 startup/shutdown script
# Use the 'restart' option if services are already up
# Use the 'kill' option to stop RSF-1 but leave
# services running. WARNING: this option may cause an
# incorrect failover if the remote end is in automatic mode!
#
# Platform: Unix
#
# Author: High-Availability Ltd / Paul Griffiths-Todd
#
# Rewritten to support SMF with specific-to RackTop Systems changes
# by Sam Zaydel
#
# Copyright (c) 2014 RackTop Systems.
set -o xtrace
. /opt/HAC/bin/rsf.sh
. /lib/svc/share/smf_include.sh
#
# Block states if any of these file exist (cluster node dependent)
DONOTSTARTHA="/do-not-start-ha"
SVCADM="/usr/sbin/svcadm"
RESOURCE_MONITOR_CONFIG="/opt/HAC/RSF-1/agents/etc/resource_agent.cfg"
ALWAYS_BLOCK_STATE="${PRODUCT_ETC}/.force_blocked"
PRESERVE_BLOCK_STATE="${PRODUCT_ETC}/.preserve_blocked"
ALWAYS_UNBLOCK_STATE="${PRODUCT_ETC}/.force_unblocked"
RPC_CLUSTER_STMF="/opt/HAC/RSF-1/bin/rpcstmfha"
RPC_CLUSTER_STMF_LOG="/opt/HAC/RSF-1/log/rpcstmf.log"
RPC_CLUSTER_STMF_LOCK="/var/run/rsfpmon_rpcstmfha"
RPC_CLUSTER_SERVICES="/opt/HAC/RSF-1/bin/rpchasvc"
RPC_CLUSTER_SERVICES_LOG="/opt/HAC/RSF-1/log/rpchasvc.log"
RPC_CLUSTER_SERVICES_LOCK="/var/run/rsfpmon_rpchasvc"
export DONOTSTARTHA
# FMRI for Brickstor HA Cluster
CLUSTER_FMRI=svc:/racktop/system/cluster:default
#
# Shutdown delay warning.
DELAY=2
#
# See if the contract runner is available
CTRUN=""
if [ -x "/usr/bin/ctrun" ] ; then
CTRUN="/usr/bin/ctrun -l none"
fi
#
# Insert generic rsfmon command line flags here
PRODUCT_MONOPTS=""
if [ -f ${ALWAYS_BLOCK_STATE} ] ; then
PRODUCT_MONOPTS="${PRODUCT_MONOPTS} -b b"
else
if [ -f ${PRESERVE_BLOCK_STATE} ] ; then
PRODUCT_MONOPTS="${PRODUCT_MONOPTS} -b p"
else
if [ -f ${ALWAYS_UNBLOCK_STATE} ] ; then
PRODUCT_MONOPTS="${PRODUCT_MONOPTS} -b u"
fi
fi
fi
###################################################################
# No user modifiable parts from here on...
###################################################################
#
# The following entries are for Fedora/RedHat chkconfig tool.
#
# chkconfig: - 85 15
# description: RSF-1 from High-Availability.Com for managed application failover
#
# This must NOT contain '/' characters
DISC_HB="rsf1_hb_"
export DISC_HB
#
# Output mesg with prog/state prefix
cluster_service_msg ()
{
printf "Brickstor HA Cluster [${state}]: %s\n" "$@"
}
run_sysevent_watcher()
{
if [ -x ${ZFS_EVENT_HANDLER} -a -x ${SYS_EVENT_ADM} ] ; then
cluster_service_msg "Registering ZFS sysevent watcher: ${ZFS_EVENT_HANDLER}"
${SYS_EVENT_ADM} remove -v SUNW -c EC_zfs ${ZFS_EVENT_HANDLER}
${SYS_EVENT_ADM} restart
${SYS_EVENT_ADM} add -v SUNW -c EC_zfs ${ZFS_EVENT_HANDLER} pool=\$pool_name guid=\$pool_guid vendor=\$vendor class=\$class subclass=\$subclass timestamp=\$timestamp publisher=\$publisher sequence=\$sequence
${SYS_EVENT_ADM} restart
fi
}
killrsf()
{
signal=$1
type=$2
if [ ! -f "${PRODUCT_PID}" ]; then
cluster_service_msg "rsfmon not running"
return 1
else
rsf_pid=`cat "${PRODUCT_PID:-'/dev/null'}" 2> /dev/null`
if [ "$2" = "pgrp" ] ; then
kill -${signal} -${rsf_pid}
else
kill -${signal} ${rsf_pid}
fi
return $?
fi
}
smf_start_service () {
# This is the method used to start Brickstor High-Availability Service.
if [ -f "${DONOTSTARTHA}" -a "${state}" != "forcestart" ] ; then
cluster_service_msg "file '${DONOTSTARTHA}' exists not performing normal start-up - exit"
exit $SMF_EXIT_OK
fi
if [ "${state}" == "forcestart" ] ; then
if [ -x "${SVCADM}" ] ; then
cluster_service_msg "Reloading RPC/bind"
${SVCADM} restart rpc/bind
fi
sleep 1
rm -f "${DONOTSTARTHA}"
fi
if rsfcli isrunning ; then
cluster_service_msg "Cluster Service is already running!"
exit $SMF_EXIT_OK
fi
cleanup_rsf_pid_dir
run_sysevent_watcher
if [ -f ${PRODUCT_ETC}/.disable_disc_heartbeats ] ; then
rm -f ${PRODUCT_ETC}/.disable_disc_heartbeats >/dev/null 2>&1
fi
# Creating Temporary network interfaces, based on devices configured
# for each service.
create_ipadm_temporary_interface
cluster_service_msg "Starting Cluster Monitoring"
rotatelogs "${PRODUCT_LOGDIR}" "${RSF1_LOG}"
#
# Rotate STMFHA logs.
STMFHA_LOGS=`ls -1 ${STMFHA_LOGDIR}/${STMFHA_LOG}* 2>/dev/null|egrep -v "\.[0-99]$"`
for logfile in ${STMFHA_LOGS};do
rotatelogs "${STMFHA_LOGDIR}" "${logfile##*/}"
done
#
# Rotate SYSEVENT logs.
rotatelogs "${PRODUCT_LOGDIR}" "${ZFS_SYSEVENT_LOG}"
#
# Rotate RPC logs for STMF and Cluster Services.
rotatelogs "${PRODUCT_LOGDIR}" "${RPC_STMFHA_LOG}"
rotatelogs "${PRODUCT_LOGDIR}" "${RPC_CLUSTERSVC_LOG}"
#
# Rotate FEN logs.
rotatelogs "${PRODUCT_LOGDIR}" "${FEN_LOG}"
# Need to do more testing to make sure contract handling is correct,
# because in some of the earlier testing it appeared as though contract
# was being orphaned almost immediately, which is not what should happen
# as far as I know. For now, we won't use it here.
${PRODUCT_BIN}/rsfmon ${PRODUCT_MONOPTS} -i \
> ${COMPANY_DIR}/RSF-1/log/${RSF1_LOG} 2>&1 \
|| exit ${SMF_EXIT_ERR_FATAL}
# Start resource agent if configured.
if [ -f ${RESOURCE_MONITOR_CONFIG} -a -x ${PRODUCT_BIN}/rsfagent ] ; then
cluster_service_msg "Starting Cluster resource agent"
${PRODUCT_BIN}/rsfagent --resource-agent start
fi
#
# Start the RSF-1 RPC process.
if [ -f "${RPC_CLUSTER_STMF}" -a "${PROP_COMSTAR_SUPPORT}" = "${BOOL_STR_TRUE}" ] ; then
RPCPID=`ps -ef|grep ${RPC_CLUSTER_STMF}|grep -v grep |awk '{print $2}'`
if [ -z "${RPCPID}" ] ; then
cluster_service_msg "Starting Cluster STMF RPC process."
echo "${script} `date`: starting stmf rpc process" >> ${RPC_CLUSTER_STMF_LOG}
if [ ! -f ${RSFPMON} ] ; then
${CTRUN} ${RPC_CLUSTER_STMF}
else
${CTRUN} ${RSFPMON} -v -w 1 -l ${RPC_CLUSTER_STMF_LOCK} ${RPC_CLUSTER_STMF} >> ${RPC_CLUSTER_STMF_LOG} 2>&1 &
fi
else
echo "Not starting RSF-1 RPC process, pid indicates it is already running: ${RPCPID}"
echo "${script} `date`: not starting stmf rpc process, pid indicates it is already running: ${RPCPID}" >> ${RPC_CLUSTER_STMF_LOG}
ps -ef | grep ${RPCPID} >> ${RPC_CLUSTER_STMF_LOG}
fi
fi
if [ -x "${RPC_CLUSTER_SERVICES}" ] ; then
RPCPID=`getpid ${RPC_CLUSTER_SERVICES}`
if [ -z "${RPCPID}" ] ; then
cluster_service_msg "Starting Cluster RPC services."
echo "${script} `date`: starting cluster rpc process" >> ${RPC_CLUSTER_SERVICES_LOG}
if [ ! -f ${RSFPMON} ] ; then
${CTRUN} ${RPC_CLUSTER_SERVICES}
else
${CTRUN} ${RSFPMON} -v -w 1 -l ${RPC_CLUSTER_SERVICES_LOCK} ${RPC_CLUSTER_SERVICES} >> ${RPC_CLUSTER_SERVICES_LOG} 2>&1 &
fi
else
cluster_service_msg "Not starting Cluster RPC process, pid indicates it is already running: ${RPCPID}"
echo "${script} `date`: not starting stmf rpc process, pid indicates it is already running: ${RPCPID}" >> ${RPC_CLUSTER_SERVICES_LOG}
if is_freebsd; then
ps -x | grep ${RPCPID} >> ${RPC_CLUSTER_SERVICES_LOG}
else
ps -ef | grep ${RPCPID} >> ${RPC_CLUSTER_SERVICES_LOG}
fi
fi
fi
if [ -f "${STMFPROXY_CONFIG}" -a -x ${PRODUCT_BIN}/stmfproxy.sh ] ; then
cluster_service_msg "Starting stmfproxy monitor"
${PRODUCT_BIN}/stmfproxy.sh start
fi
if [ "${PROP_ZPOOL_SYNC_CACHE}" = "${BOOL_STR_TRUE}" ]; then
cluster_service_msg "Starting cache file sync process..."
${PRODUCT_BIN}/rsf-zfs-event subclass=RSF_START >/dev/null 2>&1 &
fi
cluster_service_msg "Cluster Service Started"
return 0
}
smf_stop_service () {
# This is the method used to stop Brickstor High-Availability Service.
# Stop resource agent if configured.
if [ -f ${RESOURCE_MONITOR_CONFIG} -a -x ${PRODUCT_BIN}/rsfagent ] ; then
cluster_service_msg "Stopping Cluster resource agent"
${PRODUCT_BIN}/rsfagent --resource-agent stop
fi
cluster_service_msg "Stopping Cluster monitoring and services in ${DELAY} seconds..."
sleep ${DELAY}
killrsf TERM noprg
if [ $? -ne 0 ]; then
cluster_service_msg "Couldn't stop Cluster (not running?)"
if [ -f "${PRODUCT_PID}" ]; then
rm -f ${PRODUCT_PID}
fi
# At this point we remove interfaces even if stop failed
remove_ipadm_temporary_interface
exit $SMF_EXIT_ERR_FATAL
fi
remove_ipadm_temporary_interface # At this point we remove interfaces
i=0
while [ -f "${PRODUCT_PID}" ] # PID file removed when rsfmon exits
do
sleep 5
i=`expr $i + 1`
if [ $i -ge 120 ]; then
cluster_service_msg "Service(s) not dying, aborting"
killrsf KILL noprg
rm -f ${PRODUCT_PID}
exit $SMF_EXIT_ERR_FATAL
fi
done
if [ -f "${STMFPROXY_CONFIG}" -a -x ${PRODUCT_BIN}/stmfproxy.sh ] ; then
cluster_service_msg "Stopping stmfproxy monitor"
${PRODUCT_BIN}/stmfproxy.sh stop
fi
#
# For old times sake.
RPCPID=`ps -ef|grep rpcmapmgr|grep -v grep |awk '{print $2}'` # old version
if [ ! -z "${RPCPID}" ] ; then
kill ${RPCPID}
fi
#
# Stop the STMF RPC process and process runner
if [ -f ${RPC_CLUSTER_STMF_LOCK} ] ; then
PID=`cat ${RPC_CLUSTER_STMF_LOCK}`
kill -9 ${PID}
cluster_service_msg "${script} `date`: stopped rsfpmon stmfha monitor pid ${PID}"
fi
RPCPID=`ps -ef|grep ${RPC_CLUSTER_STMF}|grep -v grep |awk '{print $2}'`
if [ ! -z "${RPCPID}" ] ; then
kill ${RPCPID}
cluster_service_msg "${script} `date`: stopped ${RPC_CLUSTER_STMF} pid ${RPCPID}"
fi
#
# Stop the cluster services RPC process and proces runner.
if [ -f ${RPC_CLUSTER_SERVICES_LOCK} ] ; then
PID=`cat ${RPC_CLUSTER_SERVICES_LOCK}`
kill -9 ${PID}
cluster_service_msg "${script} `date`: stopped rsfpmon cluster services monitor pid ${PID}"
fi
RPCPID=`getpid ${RPC_CLUSTER_SERVICES}`
if [ ! -z "${RPCPID}" ] ; then
kill ${RPCPID}
cluster_service_msg "${script} `date`: stopped ${RPC_CLUSTER_SERVICES} pid ${RPCPID}"
fi
cluster_service_msg "Cluster Service Stopped"
return 0
}
script="`basename $0`"
rev='$Revision: 2.106 $'
state=$1
case "${state}" in
'start_msg')
echo "Starting ${COMPANY} RSF-1"
;;
'stop_msg')
echo "Stopping ${COMPANY} RSF-1"
;;
'blockstart')
touch "${DONOTSTARTHA}"
;;
'forcestart' | 'start')
smf_start_service
;;
'restart')
if rsfcli isrunning ; then
cluster_service_msg "Cluster Service is already running!"
exit $SMF_EXIT_OK
fi
cluster_service_msg "Restarting Cluster Service"
smf_stop_service
smf_start_service
;;
'kill')
copyright
cat <<"EOF"
_ _ _
| | / /___ __________ (_)___ ____ _
| | /| / / __ `/ ___/ __ \/ / __ \/ __ `/
| |/ |/ / /_/ / / / / / / / / / / /_/ /
|__/|__/\__,_/_/ /_/ /_/_/_/ /_/\__, /
/____/
Killing rsfmon can cause a split-brain scenario to occur,
resulting in application data corruption. Only kill rsfmon if
you are aware of the risks and are certain you want to proceed
EOF
echo "Do you really want to kill rsfmon (yes/no) ?"
if xor yes no yes; then
exit 1
fi
cluster_service_msg "Stopping RSF-1 monitoring in ${DELAY} seconds..."
sleep ${DELAY}
killrsf KILL pgrp
if [ $? -ne 0 ]; then
if [ -f "${PRODUCT_PID}" ]; then
rm -f ${PRODUCT_PID}
fi
cluster_service_msg "Couldn't kill RSF-1 (not running?)"
exit 1
fi
rm -f ${PRODUCT_PID}
cluster_service_msg "RSF-1 stopped"
;;
'stop')
smf_stop_service
;;
'-v')
echorev
;;
*)
echo "Usage: ${script} <start|forcestart|blockstart|restart|kill|stop>"
;;
esac
exit $SMF_EXIT_OK