#!/bin/bash
#
# description: Start and stop SBX services.
#############################################################
#
# Copyright (c) 2009 Sonus Networks, Inc.
#
# All Rights Reserved.
# Confidential and Proprietary.
#
# sbxStart.sh
#
# Mark St. Pierre
# 6/1/09
#
# Module Description:
# Script to manage Linux SBX service. This script is 
# installed as: $ETC_INITD_SBX
#
#############################################################

# this is somewhat of a hack, but the service command is often run from
# the openclovis log directory.  the issue is that the openclovis log
# directory gets removed and re-created every time we start up.  If the
# 'service sbx ...' command is then run, you get the nasty 'can't get parent dir'
# errors from shell-init.  Avoid it by simply changing directories to one we
# know will exist.  We will still get 2 shell-init errors, but that is about it
cd /opt/sonus 2> /dev/null

# Source function library...
if test -f /lib/lsb/init-functions
then
   lsbtype="base"
   . /lib/lsb/init-functions
else
   lsbtype="nolsb"
fi

# Sourcing common setup
# this also brings in sonusCommands and sonusCommonFiles
# Note: we also need to check staging dir since while upgrading from 4.x versions,
#      sbxStart.sh from newer software will be called in the middle of upgrade.
if [ -e /opt/sonus/staging/sonusCommonUtils.sh ];then
    source /opt/sonus/staging/sonusCommonUtils.sh
elif [ -e /opt/sonus/bin/sonusCommonUtils.sh ];then
    source /opt/sonus/bin/sonusCommonUtils.sh
else
    /bin/echo "Could not locate sonusCommonUtils.sh Exiting..."
    exit 1
fi

subSystem=sbx
RETVAL=0
EINTR=4
HOME=/root
sonusRoot=$SONUS_DIR
sbxConfDir=$SONUS_DIR/conf
sbxScriptsDir=$sonusRoot/sbx/scripts
sonusTmpDir=$SONUS_TMP_DIR
installRoot=$SONUS_SBX_DIR
stagingDir=$SONUS_STAGING_DIR
sonusBin=$SONUS_SBX_BIN_DIR
installUpgradeDir=$SONUS_INSTALLUPGRADE_DIR
progAsp="safplus"
ASP=$SONUS_OPENCLOVIS_DIR/etc/init.d/safplus
ASPAMF=$SONUS_OPENCLOVIS_BIN_DIR/safplus_amf
ASPWATCHDOG_NAME=safplus_watchdog
ASPWATCHDOG=$SONUS_OPENCLOVIS_DIR/etc/$ASPWATCHDOG_NAME.py
ASP_START_OPTIONS="start --enforce-tipc-settings >> /var/log/sonus/tmp/asp.out 2>&1"
ASP_STOP_OPTIONS="stop >> /var/log/sonus/tmp/asp.out 2>&1"
ASP_ZAP_OPTIONS="zap >> /var/log/sonus/tmp/asp.out 2>&1"
NP_UTILITIES=$SONUS_SBX_UTILITIES_NP_DIR
NP_LOG=$SONUS_LOG_SBX_DIR/np.log
softwareUpgradeCommitPendingFile=$LSWU_COMMIT_PENDING
npLoadIssueRebootMarkerFile=$NP_LOAD_ISSUE_AND_REBOOT
softwareUpgradeInProgressFile=$LSWU_IN_PROGRESS
dynamicHARequiredFile=$PERFORM_MODEL_UPDATE
revertScript=$SBX_REVERT_PL
revertLogFile=$stagingDir/revert.out
startupLog=$STARTUP_LOG
# note: use /tmp for the marker file so a reboot cleans it up
stateChangeMarkerFile=$sonusTmpDir/.serviceStartRestartStop
stateChangeCollision=0
stateChangeRequest=0
statusBfdMarkerFile=$sonusTmpDir/.bfdpid
statusReqMarkerDir=/tmp
statusReqMarkerBase=.serviceStatusReq
statusReqMarkerFile=$statusReqMarkerDir/$statusReqMarkerBase.$$
statusRequest=0
evLogDir=evlog
packageContents=$PACKAGE_CONTENTS
START_STOP_REQ=1
STATUS_REQ=2
confdDotConf=$CONFD_CONF
upgradeMarker=$UPGRADE_MARKER
# Parser array for validating installUpgradeMarker file
installUpgradeCmdArray=("$stagingDir/sbxUpdate.sh" "$stagingDir/sbxUpgrade.pl")

# note: the following file name is known by SmaLoad.cpp. Do not change it!
DRBD_ZAP_LOG=$SONUS_LOG_OPENCLOVIS_DIR/drbd.log
# marker file to assist in handling of standalone/ha pair configuration issues
# note: this name is known to chm. don't change it!
singleHAMarkerFile=$SINGLE_HA_MISMATCH
# Usage of PROG is to print only the filename along with log messages
PROG=${0##*/}

# SBX should be always started in root cgroup, if it is enabled
if [ -d /sys/fs/cgroup/cpuset/batch ]
then
  $ECHO $$ > /sys/fs/cgroup/cpuset/cgroup.procs
  $ECHO $$ > /sys/fs/cgroup/cpu/cgroup.procs
  $ECHO $$ > /sys/fs/cgroup/memory/cgroup.procs
fi

# SBX install/upgrade in progress key file
# Using /tmp for creating marker file so that it gets cleared on system reboot.
sbxInstallUpgradeInProgressKey=$SBX_INSTALL_UPGRADE_IN_PROGRESS

# InstallUpgrade Marker
installUpgradeMarker=$INSTALL_UPGRADE_MARKER

# note: the following names are used by the customer leader election library.
# don't change it!
splitBrainMarker=$SBX_SPLIT_BRAIN_MARKER
peerSplitBrainMarker=$SBX_PEER_SPLIT_BRAIN_MARKER

# Marker file is generated if vcpu count is less than required vcpu for a profile.
sweActiveProfileVcpuMarkerFile=/tmp/.sbxSweActiveProfileVcpuMarkerFile

# Flag to check if we are starting after a reboot
startingAfterReboot=false

# Exit if not truly installed.  startup scripts (considered config files)
# are only removed on a dpkg -purge... (getting ready for deb package)
[ ! -x \"$ASP\" ] || exit 0


if [[ "$1" != "status" ]]; then 
    # Check if Lca is currently running. If so don't permit user to start/stop sbx maually
    if [ -f $LCA_MARKER_FILE ]; then
        $ECHO "Cannot $1 SBX service. LCA has not completed or LCA exited with error. Please check LCA logs !!!"
        exit 1
    fi
fi

productVersion=`$DPKG -l sbc | $GREP ^ii | $AWK '{ print $3 }'`

# Check whether LCA service is enabled. LCA service would be enabled in SWe/Cloud cases.
# Also, LCA service won't be enabled in SWe 1:1 case deployed using ISO.
# '0' value indicates that LCA service is enabled and '1' value indicates that LCA service is disabled.
isLcaEnabled=`$SYSTEMCTL is-enabled lca.service &> /dev/null; $ECHO $?`

# Check for SBX install/upgrade in progress key file
# If present, it indicates that SBX install/upgrade is in progress, so exit with proper message.
if [ -f $sbxInstallUpgradeInProgressKey ]
then
  $ECHO "Service not running as SBX install/upgrade is in progress (check appropriate install/upgrade logs)"
  exit 0
fi

# Source sonusUtils.sh script (if present) to get the common functions
# like getCurrentRole()
if [ -f $STAGING_SONUS_UTILS_SH ]; then
  . $STAGING_SONUS_UTILS_SH
elif [ -f $SONUS_UTILS_SH ]; then
  . $SONUS_UTILS_SH
fi

hostSubType=$(getHwSubType)
haMode=$(getHaMode)

#
# Make sure we can find the shared libraries
#
. $BASHRC

#
# Make sure if we exit prematurely we cleanup our exclusivity
# marker file.  We also trap on exit so we properly cleanup,
# with exit causing a return value of whatever RETVAL is set to.
#
trap "cleanExit" EXIT
trap "RETVAL=$EINTR; cleanExit" HUP INT QUIT TERM

# determine var lock directory (this matches code in sbxCleanup.sh)
varLockDir="/var/lock"

#
# Functions...
#
if [ $lsbtype == "base" ] ; then
# Output PIDs of matching processes, found using pidof...
__pids_pidof() {
    $PIDOF -o $$ -o $PPID -o %PPID -x "$1" || \
        $PIDOF -o $$ -o $PPID -o %PPID -x "${1##*/}"
}

status() {
    local base pid pid_file=

    # Test syntax...
    if [ "$#" = 0 ] ; then
        $ECHO "Usage: status [-p pidfile] {program}"
        return 1
    fi
    if [ "$1" = "-p" ]; then
        pid_file=$2
        shift 2
    fi
    base=${1##*/}

    # First try "pidof"...
    pid="$(__pids_pidof "$1")"
    if [ -n "$pid" ]; then
        $ECHO "${base} (pid $pid) is running..."
        return 0
    fi

    # the pid wasn't found.  see if valgrind is being run on it
    # so we can still report its true state.
    vgPid=`$PS -ef | $GREP valgrind | $GREP -v grep | $GREP ${base} | $AWK '{print $2}'`
    if [ -n "$vgPid" ]; then
        $ECHO "${base} (valgrind pid $vgPid) is running..."
        return 0
    fi

    # if we add a new component during LSWU, the component is not started
    # until after both sides of the model are updated or the node is
    # running as the active node.  i.e. we cannot have a standby component
    # when the active component doesn't exist.  After the first node is updated,
    # the new component (while the node is standby) shows 'stopped' until the
    # node switches to active in order for the second node to be updated.
    # Prevent the component from being displayed as stopped until the LSWU
    # is complete, but still indicate all is well.
    if [ -e "$DYNAMIC_HA_NEW_COMPS" ]; then
        $GREP -q $base $DYNAMIC_HA_NEW_COMPS
        if [ $? -eq 0 ]; then
            return 0
        fi
    fi

    $ECHO "${base} is stopped"
    return 3
}
fi

logEcho () {
    $LOGGER -t $PROG $1
    $ECHO $1
}

validateHW() {
   minMemory="12000000"

   if [[ $prodString == "5100"  || $prodString == "5110" ]]; then
      minMemory="6111000"
   fi

   # Validdate 12G total memory...
   totalMem=`$CAT /proc/meminfo | $GREP MemTotal | $AWK '{print $2}'`
   if [ $totalMem -lt $minMemory ]; then
      logEcho "Total Memory = $totalMem, minimum expected = $minMemory"
      return 1
   fi

   # Validate Cavium NPs are seen on PCI bus...
   numCaviums=`$LSPCI | $GREP Cavium | $WC -l`

   # Yellowfin has only 1 NP
   if [ $hwType == "SBC5400" ];then
     if [ $numCaviums -lt "1" ]; then
       logEcho "Total Caviums = $numCaviums, minimum expected = 1 for 5400"
       return 2
     fi
   elif [ $numCaviums -lt "2" ]; then
     logEcho "Total Caviums = $numCaviums, minimum expected = 2"
     return 2
   fi

   # Tests passed; return success...
   return 0
}

#
#This function is introduced to support the SWe Config Scaling feature
#If the config profile type is "large" or "largeuseracl" or "largemedia", then check if RAM>=18GB
#
validateSWeMemForCfgProfileType(){
    if [ -e $SWE_CFG_PROFILE_TXT ]; then
        sweCfgIndex=`$GREP "configProfile=" $SWE_CFG_PROFILE_TXT | $AWK -F= '{print $2}'`;

        #For "large" config profile, "1" is the index maintained in app
        sweLargeCfgProfile=1;
        sweLargeUserAclCfgProfile=3;
        sweLargeMediaProfile=4
        
        if [[ $sweCfgIndex == $sweLargeCfgProfile ]] || [[ $sweCfgIndex == $sweLargeUserAclCfgProfile ]] || [[ $sweCfgIndex == $sweLargeMediaProfile ]]; then
            ret=$(isCnf)
            if [ $ret == $DOCKER_ENV ]; then
                sweTotalMem=`$CAT /etc/podinfo/memory`
            else
                sweTotalMem=`$GREP "MemTotal" /proc/meminfo | $AWK '{print $2}'`
            fi
            #for applying large profile min RAM should be 18GiB.
            reqMemLargeCfgProfile="18308136"
        
            if [ $sweTotalMem -lt $reqMemLargeCfgProfile ]; then
                logEcho "Total Memory = $sweTotalMem KB , minimum Memory required to apply large or largeuseracl or largemedia profile = 18GiB"
                $LOGGER -t $PROG "Insufficient Memory detected for applying large or largeuseracl or largemedia profile, Please configure RAM>=18GiB for large profile, application not started...."
                return 1
            fi
        fi
    fi   
    
    return 0
}

validateHW5000(){
    local dateStr=`$DATE +%Y_%m_%d_%H_%M_%S`

    if [[ "$hostSubType" == "virtualCloud" || "$hostSubType" == "virtualContainer" ]]; then
        $RM -f $HOSTCHECK_MARKER_FILE > /dev/null 2>&1
    fi

    mode=$(getApplicationType)
    ret=$(isCnf)
    if [ $ret != 0 ]; then
        $PYTHON3 $SONUS_SBX_SCRIPT_DIR/HostCheck_cnf.py -p SBC >> $startupLog 2>&1
    else
        $HOST_CHECK -p $mode >> $startupLog 2>&1
    fi
    if [ $? -ne 0 ]; then
        $ECHO "$dateStr: Host Check Failed." >> $startupLog
         if [[ "$hostSubType" == "virtualCloud" || "$hostSubType" == "virtualContainer" ]]; then
             $ECHO "Failed" >> $HOSTCHECK_MARKER_FILE
             $SBX_STARTUP_ERROR_LOG_PY "LCA: Host check failed. Application cannot be started . Please check $startupLog! "
         fi
         return 1
    fi
    $ECHO "$dateStr: Host Check Passed" >> $startupLog
    return 0
}

start() {
    ulimit -c unlimited
    $LOGGER -t $PROG "Starting service $productVersion"

    # Add a marker so we can check if sbxCleanup was successful 
    $TOUCH $SBX_CLEANUP_MARKER

    # In case of container, Disable auto-restart because k8 probes 
    # will take care of restarting the container when app goes down.
    if [[ "$hostSubType" == "virtualContainer" ]]; then
        $MODIFYRESTART_PL -d
    fi 

    #TBD: Temporary Fix This change need to be reverted once the issue with EmaProcess crashing is resolved
    if [[ $HOST_NETWORKING == "true" ]] && [[ $POD_TYPE == "SLB" ]]; then
        $PERL $STUBPROC_PL ema
    fi

    if [[ -e $PERSONALITYTYPE_TXT && (`$CAT $PERSONALITYTYPE_TXT` == "slb" || `$CAT $PERSONALITYTYPE_TXT` == "isbc") ]]; then
        logEcho "Stubbing CCS process on SLB.."
        $PERL $STUBPROC_PL ccs
    fi
    
    cnf=$(isCnf)
    if [ $cnf == 0 ]; then
	logEcho "Stubbing RacClientProcess in non cnf.."
	$PERL $STUBPROC_PL rac
    fi

    if [ -e $sweActiveProfileVcpuMarkerFile ]; then
        logEcho "Service not started as VCPU count is not sufficient for the current Active Profile, please increase the VCPU numbers and try again"
        $LOGGER -t $PROG "Service not started as VCPU count is not sufficient for the current Active Profile, please increase the VCPU numbers and try again."
        exit
    fi

    # see if we need to cleanup due to a standalone/HA pair configuration issue
    if [ -e $singleHAMarkerFile ]; then
       $RM -fr $SONUS_OPENCLOVIS_VAR_RUN_DIR $SONUS_OPENCLOVIS_VAR_LIB_DIR
       $RM -f $singleHAMarkerFile
    fi

    # Cleanup staging from previous install/upgrade
    if [ -e $packageContents ]; then
       currDir=$PWD
       cd $stagingDir
       $RM -f `$CAT $packageContents`
       # Adding more files to be removed in case multiple installs/upgrades occur before starting SBX
       $RM -f sbc*.tar.gz *.md5 *.sha256 appInstall*.sh connexip*.deb ema*.deb sbc*.deb sbc*.signature
       $RM -f $packageContents hostType osType *.backup
       cd $currDir
    fi

    # Perform any cleanup (if any) required post-reboot
    if [ $startingAfterReboot == "true" ];then
       preUpgradeCheckStatusFile=/var/log/sonus/upgrade/preChecks/preUpgradeCheckStatus
       if [ -e $preUpgradeCheckStatusFile ]; then
          preCheckStatus=`$CAT $preUpgradeCheckStatusFile | $GREP ^CheckStatus | $AWK -F "=" '{print $2}'`
          if [[ $preCheckStatus == "checkInProgress" ]]; then
             $LOGGER -t $PROG "Resetting previous preUpgradeCheck status..."
             $SED -i 's/^CheckStatus=.*/CheckStatus=checkFailed/' $preUpgradeCheckStatusFile
          fi
       fi
    fi

    # remove any leader election info previously left behind
    # note: it is not removed for failure/software based restarts, hence
    # it is here instead of in sbxCleanup.sh.
    # note: also remove the 'error' file the findServiceUpTime script
    # creates when getting the peer file
    $RM -f $splitBrainMarker ${peerSplitBrainMarker}*

    # Perform simple HW validation tests...
    if [[ $hwType == "ConnexIP" || $hwType == "ConnexIP5200" || $hwType == "ConnexIP5100" || $hwType == "SBC7000" || $hwType == "SBC5400" ]]; then
        validateHW
        if [ $? -ne 0 ]; then
            logEcho "HW validation failed, application can not be started; exiting..."
            return $RETVAL
        fi
    else 
        # check hwType in the supported list
        # Run the HostCheck to check for the minimum Hardware Requirement.
        if [ -e $hostTypeFile ]; then 
            hwType=`$CAT $HOST_TYPE`
            validateHW5000 
            if [ $? -ne 0 ]; then
                $ECHO -e "HostCheck validation failed, application can not be started; exiting..."
                exec 33<&-
                return $RETVAL
            fi
        fi
    fi
    cnf=$(isCnf)
    if [ $cnf -ne 0 ]; then
        cpu_count=`$CAT $SWE_CPU_INFO_K8S`
    else 
        cpu_count=`$NPROC --all` 
    fi

    if [[ "$hostSubType" != "virtualContainer" ]];then
        if [ $hwType == "ConnexIP5000" ] && [ $cpu_count -ge 2 ];then
            $CSET proc -m -p $$ -t sig >> $startupLog 2>&1
        fi
    else
        $LOGGER -t $PROG "Not performing cset operations in container environment."
    fi

    if [[ $hwType == "ConnexIP5000" ]] && [[ "$hostSubType" != "virtualContainer" ]]; then
        sweVmConfigFlag=`$GREP "status" $VMCONFIG_CHANGE_TXT | $CUT -d "=" -f 2`
        if [[ "$sweVmConfigFlag" == "-1" ]]; then
            $LOGGER -t $PROG "Exiting the $subSystem startup, reduction in VM configuration is detected; application can not be started.... "
            logEcho "Reduction in VM configuration is detected, application can not be started; exiting..."
            exit
        fi

        validateSWeMemForCfgProfileType
        if [ $? -ne 0 ]; then
            $LOGGER -t $PROG "Exiting the $subSystem startup,increase RAM to 18GB or more for large profile, application not started.... "
            exit
        fi
    fi

    # Start any third party applications on cloud 
    if [ -e $THIRD_PARTY_APPS_TXT ] && [ -d /sys/fs/cgroup/cpuset/sig ]; then
        $RUN_THIRD_PARTY_APPS_SH -b >> $startupLog 2>&1
    fi

    # Use higher resource limits - the new limits are not in-effect right after reboot
    oldmaxFiles=`ulimit -HSn`
    maxFiles=`$SYSCTL -n fs.file-max`
    ulimit -HSn $maxFiles

    # Remove marker file if needed
    removeRebootMarkerFile
    removeSoftRestartMarkerFile
    updatePktPortSpeed

    # If server is rebooted in the middle of the upgrade
    # that will be treated as an error case and we'll revert to previous version
    # If an un-intended reboot happens while in the middle of upgrade or before commit,
    # the upgrade needs to be re-tried.
    checkSoftwareUpgrade
    
    if [[ "$hostSubType" == "virtualContainer" && "$CNF_PRIVILEGE_MODE" == "false" ]]; then
        $LOGGER -t $PROG "Skipping Kernel parameter modifications for non-privileged CNF system..."
    else
        # Configure kernel runtime parameters...
        $SYSCTL -e -p /etc/sysctl.conf > /dev/null 2>&1

        # Set app solicit for all interfaces...
        for f in $($FIND /proc/sys/net -name 'app_solicit'); do
            $ECHO "0" > $f
        done

        # Always use the best local address for this target.
        $ECHO "2" > /proc/sys/net/ipv4/conf/all/arp_announce

        # eliminate ARP flux when multiple interfaces are on the same subnet
        $ECHO "1" > /proc/sys/net/ipv4/conf/all/arp_ignore

        # Accept gratuitous arp frames ; 1: accept GARP; 3: accept GARP and IPv6 Unsol Neigh Adv.
        $ECHO "3" > /proc/sys/net/ipv4/conf/all/arp_accept

        # Disable IPv6 router advertisement acceptance
        $ECHO "0" > /proc/sys/net/ipv6/conf/default/accept_ra

        # Ignore IPv6 Redirect messages
        $ECHO "0" > /proc/sys/net/ipv6/conf/default/accept_redirects

        # Keep IPv6 global addresses as VRF enslavement changes
        $ECHO "1" >  /proc/sys/net/ipv6/conf/all/keep_addr_on_down
    fi

    # generate AES and DES encryption keys if they are not present
    if $GREP -q "CFG_KEY_0" $confdDotConf
    then
        $GENERATE_ENC_KEYS_SH
    fi

    # pre-create /var/log/sonus/tmp/cnxipm with proper ownership such that EMA
    # need not create it with wrong ownership
    $MKDIR -p $sonusTmpDir/cnxipm/pool
    $CHOWN -R www-data:sonus $sonusTmpDir/cnxipm
    $CHMOD -R 770 $sonusTmpDir/cnxipm

    # NOTE: startup common to all types of startup (like oracle, resources, etc) is
    # taken care of in the sbxCleanup.sh script so that it takes affect for manual
    # as well as AMF initiated starts (failover, switchover)
    # HOWEVER, try to start openhpid here since if it won't start we don't want
    # to continue with startup and we cannot cleanly stop it from sbxCleanup.sh.
    # NOTE: it has been taking somewhere around 1 min with BMC 3.14

    tasksetArguments=""
    if [[ "$hostSubType" != "virtualContainer" ]]; then
        ret=$(isCnf)
        if [ $ret != 0 ]; then
            openhpidCheck 90 1 /dev/null
            if [ $? -ne 0 ] ; then
                openhpidRestart /dev/null skip
                if [ $? -ne 0 ]; then
                    logEcho "openhpi service won't start, application can not be started; exiting..."
                    RETVAL=1
                    return $RETVAL
                fi
            fi
        fi
    else
        # Container environment.
        # Need to isolate all the application processes from the media cores.
        $(is_functional_test_deployment)
        if [ $? == 1 ]; then
            sigCores=$(get_sig_cores "/var/log/syslog")
            if [[ $? -ne 0 ]]; then
                logEcho "Falied get the list of signaling cores"
            else
                logEcho "Got the signaling cores as - $sigCores"
                tasksetArguments="$TASKSET -ac $sigCores "
            fi
        fi
    fi

    # Start SAF process: GoAhead srp or Openclovis asp...
    $ECHO "Starting $progAsp: "
    if [ $lsbtype == "base" ] ; then
        if $tasksetArguments $START_STOP_DAEMON --start --quiet --exec $ASP -- $ASP_START_OPTIONS; then
            log_success_msg
        else
            log_failure_msg
            RETVAL=1
        fi
    else
        if daemon $ASP $ASP_START_OPTIONS; then
            success
        else
            failure
            RETVAL=1
        fi
    fi
    
    if [[ "$hostSubType" != "virtualContainer" ]]; then
        # Move bfdd process to sig cset
        # Note: Performing cset move operation even if the bfdd process is already in sig cset using --force option
        logEcho "Moving bfdd process to sig cset."
        $CSET proc -m -p $($PGREP bfdd) -t sig --force
        if [ $? -eq 0 ]; then
            logEcho "bfdd process moved to sig cset"
        else
            logEcho "Failed to move bfdd process to sig cset"
        fi
    fi

    # This marker is removed by sbxCleanup if it runs successfully
    # If the marker exists, it means sbxCleanup was not successful
    # Therefore do not let sbc to come up
    if [ -f $SBX_CLEANUP_MARKER ]; then
       logEcho "sbx cleanup was not succesful, check sbx_start and sbx_stop log for errors"
       zap
    fi

    # Restore initial limits
    ulimit -HSn $oldmaxFiles

    # Remove reboot due to NP load error marker file
    $RM -f $npLoadIssueRebootMarkerFile

    if [[ "$hostSubType" == "virtualContainer" ]]; then
        if [ $RETVAL -eq 0 ]; then
            $CHMOD g+s $SONUS_TAILF_CONFD_DIR/cdb
            $CHMOD g+ws $SONUS_TAILF_CONFD_DIR/log
        fi
    fi

    return $RETVAL
}

stopZap() {
    mode=$1
    if [ "$mode" = "stop" ]; then
       action="Stopping"
       options=$ASP_STOP_OPTIONS
    elif [ "$mode" = "zap" ]; then
       action="Zapping"
       options=$ASP_ZAP_OPTIONS
    fi

    hwType="Unknown"
    if [ -e $hostTypeFile ]; then 
        hwType=`$CAT $HOST_TYPE`
    fi

    if [ -f $ASP ]
    then
        $LOGGER -t $PROG "$action service $productVersion"
        $ECHO "$action $progAsp: "
        if [ $lsbtype == "base" ] ; then
            $ASP $options && log_success_msg || log_failure_msg
        else
            $ASP $options && success || failure
        fi
        RETVAL=$?
    fi

    # remove any leader election info previously left behind
    # note: it is not removed for failure/software based restarts, hence
    # it is here instead of in sbxCleanup.sh.
    # note: also remove the 'error' file the findServiceUpTime script
    # creates when getting the peer file
    $RM -f $splitBrainMarker ${peerSplitBrainMarker}*

    # this is done via model cleanup script, but we remove it here
    # since if the apps were still running, the cleanup script doesn't
    # bother to remove it.
    $RM -f $varLockDir/$subSystem


    # NOTE: cleanup common to all types of shutdown (like oracle, resources, etc) is
    # taken care of in the sbxCleanup.sh script so that it takes affect for manual
    # as well as AMF initiated stops (failover, switchover)

    return $RETVAL
}

stop() {
    stopZap stop
}

zap() {
    stopZap zap

    # for some reason, we are not always cleaning out the /tmp file system.  since
    # zap is used ONLY to indicate a reboot, make sure we clean up our marker files
    # so we don't have exclusivity issues after reboot.
    $RM -f $stateChangeMarkerFile
    $RM -f $statusReqMarkerDir/${statusReqMarkerBase}*

    if [[ "$hostSubType" != "virtualCloud" && "$haMode" == "1to1" && "$hostSubType" != "virtualContainer" ]]; then
       # log a timestamp of when we are doing this: note this matches sma code!
       drbdTime=`$DATE +"%n%a %b %d %H:%M:%S.%N %Z %Y"`
       $ECHO "$drbdTime\n`$BASENAME $0` called due to reboot (zap)" >> $DRBD_ZAP_LOG
       # kill any process using the drbd mounted file system
       # and then unmount it
       mounted="FALSE"
       $MOUNT | $EGREP "drbd" | $GREP -q -v grep 
       if [ $? -eq 0 ]; then
          drbdDir=`$MOUNT | $EGREP "drbd" | $AWK {'printf $3'}`
          $ECHO "$0 drbd partition $drbdDir mounted" >> $DRBD_ZAP_LOG
          $FUSER -m $drbdDir -n file -s -k > /dev/null 2>&1
          $SLEEP 1
          mounted="TRUE"
       fi

       # check drbd status, a return of 3 indicates not loaded
       $SERVICE_SH drbd status >> $DRBD_ZAP_LOG 2> /dev/null 
       rtnDrbdStatus=$?
       if [ $rtnDrbdStatus -ne 3 ]; then  
          $ECHO "`$BASENAME $0` drbd service active" >> $DRBD_ZAP_LOG
          $DRBDADM disconnect mirror  >> $DRBD_ZAP_LOG 2>&1

          if [ $mounted == "TRUE" ]; then 
             unMntCnt=1
             $UMOUNT /dev/drbd0 >> $DRBD_ZAP_LOG 2>&1
             while [ 1 ] 
             do
               $SLEEP 1
               $MOUNT | $GREP -e drbd | $GREP -q -v grep 
               if [ $? -eq 0 ]; then
                  $ECHO " `$BASENAME $0` Drbd FS still mount, retry unmount" >> $DRBD_ZAP_LOG 2>&1
                  $UMOUNT /dev/drbd0 >> $DRBD_ZAP_LOG 2>&1
               else 
                  break;
               fi
               unMntCnt=`$EXPR $unMntCnt + 1`
               if [ $unMntCnt -gt 120 ]; then 
                  $ECHO " `$BASENAME $0` Drbd unmount Faile $unMntCnt continuing " >> $DRBD_ZAP_LOG 2>&1
                  break;
               fi 
             done
          fi
       elif [ $mounted == "TRUE" ]; then 
          $UMOUNT /dev/drbd0 >> $DRBD_ZAP_LOG 2>&1
       fi

       if [ $rtnDrbdStatus -ne 3 ]; then  
          # Timing we could have reconnected, disconnect again before state change
          $DRBDADM disconnect mirror >> $DRBD_ZAP_LOG 2>&1
          $DRBDADM primary mirror >> $DRBD_ZAP_LOG 2>&1

          #check or check and repair the drbd file  system
          $ECHO "`$BASENAME $0` $FSCK -y  /dev/drbd0" >> $DRBD_ZAP_LOG
          $FSCK -y  /dev/drbd0  >> $DRBD_ZAP_LOG 2>&1
          rtnFsck=$?
          #8 - Operational error
          if [ $rtnFsck -eq 8 ]; then
             for backUpBlock in `$DUPME2FS  /dev/drbd0  | $GREP  "Backup superblock" | $AWK -F "," '{print $1}' | $AWK '{print $4}'`
             do
               $FSCK -b $backUpBlock -y  /dev/drbd0  >> $DRBD_ZAP_LOG 2>&1
               rtnFsck=$?
               #0 - No errors
               #1 - File system errors corrected
 
               if [ $rtnFsck -eq 0 -o $rtnFsck -eq 1 ]; then
                  break
               else 
                  $ECHO "`$BASENAME $0` drbd fsck returned $rtnFsck for $backUpBlock next backup superblock" >> $DRBD_ZAP_LOG
               fi
             done

             #4 - File system errors left uncorrected
             #8 - Operational error
             #1 - File system errors corrected
             #0 - No errors
             if [ $rtnFsck -eq 4 -o $rtnFsck -eq 8 ]; then
                $ECHO "`$BASENAME $0` drbd partition /dev/drbd0 could not be repaired, continuing" >> $DRBD_ZAP_LOG
             fi
          fi          

          # stop drbd since the OS is going down
          $ECHO "`$BASENAME $0` Terminating DRBD" >> $DRBD_ZAP_LOG 3>&1
          $SLEEP 1
          $DRBDADM secondary mirror  >> $DRBD_ZAP_LOG 2>&1
          $DRBDADM detach mirror  >> $DRBD_ZAP_LOG 2>&1
          $SERVICE_SH drbd stop >> $DRBD_ZAP_LOG 2>&1
       fi
    fi
}

# If sbx is getting restarted as part of the server reboot,
# then check for the revert marker file and revert to previous version 
# The revert process removes the marker file
function checkSoftwareUpgrade
{
  if [ $startingAfterReboot == "true" ];then
    if [[ -f $softwareUpgradeCommitPendingFile ]]; then
      if [ -f $revertScript ]; then
        $LOGGER -t $PROG "Reverting previous upgrade..."
        # sbxRevert.pl is going to call 'service sbx stop'.  However, the stop will fail
        # due to the exclusivity lock.  So remove the lock here so that an auto revert
        # is still possible.  Note that we would not have gotten here unless we created
        # the marker, so we can just delete it. (i.e. a state change collision would
        # not reach here...)
        $RM -f $stateChangeMarkerFile > /dev/null 2>&1
        # since we are stopping LSWU, remove potentially existing dynamic HA marker file
        # so that we prevent model update functionality on the next start
        $RM -fr $dynamicHARequiredFile > /dev/null 2>&1
        # rm -f $softwareUpgradeCommitPendingFile This is done as part of revert scripts
        $PERL $revertScript -r error -c internalError -i >> $revertLogFile 2>&1
        RETVAL=0
        exit
      else
        $LOGGER -t $PROG "Revert script: $revertScript doesn't exist" > $revertLogFile 2>&1
      fi
    fi
  fi
}

function removeRebootMarkerFile()
{
    # If this parent(caller) of this script is /etc/init.d/rc
    # it is a reboot case, if not, it is a restart of app
    # We are working on reboot marker, if it is not a reboot (i.e. user issues service sbx start)
    # remove the reboot marker file so the count for
    # max reboots
    # TBD: This works for debian and needs some tuning for other
    # linux distributions.
    if [ $startingAfterReboot != "true" ];then
        alterRebootAndSoftRestartCount rm "rebootCountMarkerFile"
    else
        # Increment the count in the file
        alterRebootAndSoftRestartCount inc "rebootCountMarkerFile"
    fi
}

function removeSoftRestartMarkerFile()
{
    # We come here in the following cases:
    # 1. system rebooted
    # 2. user issued service sbx start
    # In both the cases, remove the marker file so that
    # the number of restarts count will be reset
    alterRebootAndSoftRestartCount rm "softRestartCountMarkerFile"
}

function checkAndUpdateXmls()
{
  local xmlFile=$1
  local gpioValue=$2

  if [ -e $xmlFile ];then
    $GREP -q "<pktPortSpeed>speed10Gbps</pktPortSpeed>" $xmlFile
    if [ $? -eq 0 ]; then
      # 10G in CDB XML file
      if [ "$gpioValue" != "speed10Gbps" ]; then
        $LOGGER -t $PROG "Updating packet port speed from 10Gbps to 1Gbps in $xmlFile"
        $SED -i".orig" -e "s/<pktPortSpeed>speed10Gbps<\/pktPortSpeed>/<pktPortSpeed>speed1Gbps<\/pktPortSpeed>/" $xmlFile
        updated="true"
      fi
    else
      $GREP -q "<pktPortSpeed>speed1Gbps</pktPortSpeed>" $xmlFile
      if [ $? -eq 0 ]; then
        # 1G in CDB XML file
        if [ "$gpioValue" != "speed1Gbps" ]; then
          $LOGGER -t $PROG "Updating packet port speed from 1Gbps to 10Gbps in $xmlFile"
          $SED -i".orig" -e "s/<pktPortSpeed>speed1Gbps<\/pktPortSpeed>/<pktPortSpeed>speed10Gbps<\/pktPortSpeed>/" $xmlFile
          updated="true"
        fi
      fi
    fi
  fi

}

function updatePktPortSpeed()
{
  # Compare PKT port speed as learned from GPIO (updated by CHS service)
  # and current setting in CDB XML file, if they are different, update XML file
  # and remove DBs so that on startup new settings will be loaded.
  # All previous configuration will be lost
  local pktPortSpeedFile=$ETC_PKT_PORT_SPEED
  local cdbFile="$SONUS_TAILF_CDB_DIR/sonusSystem.xml"
  local sonusSystemSbyDotXml="$SONUS_TAILF_CONFD_DIR/sonusSystemSby.xml"
  local sonusSystemSbyDotXmlLocal="$SONUS_TAILF_CONFD_DIR/sonusSystemSby.xml.local"
  local sonusMetadataDotXml="$SONUS_TAILF_CDB_DIR/sonusMetadata.xml"
  local sonusPortDotXml="$SONUS_TAILF_CDB_DIR/sonusPort.xml"
  local sonusPortDotXml1G="$SONUS_TAILF_CDB_DIR/sonusPort.xml.1G"
  local sonusPortDotXml10G="$SONUS_TAILF_CDB_DIR/sonusPort.xml.10G"
  local updated="false"

  if [ -e $pktPortSpeedFile ]; then
    gpioValue=`$CAT $pktPortSpeedFile`
  else
    $LOGGER -t $PROG "Pkt port speed file not found: $pktPortSpeedFile, exiting"
    return 1
  fi
  
  checkAndUpdateXmls $cdbFile $gpioValue 
  checkAndUpdateXmls $sonusSystemSbyDotXml $gpioValue 
  checkAndUpdateXmls $sonusSystemSbyDotXmlLocal $gpioValue
 
  if [ "$updated" == "true" ]; then
    $LOGGER -t $PROG "Updating xmls..."
    if [ -e $sonusMetadataDotXml.orig ] ; then
      $CP -f $sonusMetadataDotXml.orig $sonusMetadataDotXml
    else
      $LOGGER -t $PROG "$sonusMetadataDotXml.orig doesnt exist! Unable to update $sonusMetadataDotXml for $gpioValue mode..."
      return 1 
    fi
    if [ "$gpioValue" == "speed10Gbps" ]; then
      $SED -i '/^BEGINpkt23/,/^ENDpkt23/{/*/!d}' $sonusMetadataDotXml
      if [ -e $sonusPortDotXml10G ]; then
         $CP -pf $sonusPortDotXml10G $sonusPortDotXml
      else
         $LOGGER -t $PROG "$sonusPortDotXml10G doesnt exist! Unable to update $sonusPortDotXml for $gpioValue mode..."
         return 1 
      fi
    else
      if [ -e $sonusPortDotXml1G ]; then
         $CP -pf $sonusPortDotXml1G $sonusPortDotXml
      else
         $LOGGER -t $PROG "$sonusPortDotXml1G doesnt exist! Unable to update $sonusPortDotXml for $gpioValue mode..."
         return 1 
      fi
    fi
    #Remove all tags
    $SED -i -e '/^BEGIN/d' -e '/^END/d' $sonusMetadataDotXml
 
    $LOGGER -t $PROG "Packet port mode changed, removing databases..."
    $REMOVECDB_SH
    $REMOVE_SONUSDB_SH
  fi
}

restart()
{
    stop
    start
}	

# prevent simultaneous issuances of start/restart/stop prior to the ASP
# being in a state to tell us it if it is running or not.  The ASP also has
# support to prevent simultaneous execution of the script, but we may do other
# processing outside of the ASP script that should be guarded as well.  This also
# covers the case where the ASP status reports not running even though the watchdog
# is restarting it, or our scripts are being run prior to starting the asp processes.
# In addition, other scripts use 'status' to see if running, so we need to cover the
# window mentioned above where status would have said nothing is running.
checkExclusivity()
{
    returnVal=0
    stateChangeReq=$1

    # find out how many status commands are in progress
    statusCount=`$FIND $statusReqMarkerDir -name "${statusReqMarkerBase}.*" -print | $WC -l`

    # setup flags to assist with cleanup effort and account for my
    # status request
    if [ $stateChangeReq -eq $START_STOP_REQ ]; then
       stateChangeRequest=1
    elif [ $stateChangeReq -eq $STATUS_REQ ]; then
       statusRequest=1
       statusCount=$((statusCount+1))
    fi

    # the following are the exclusivity rules in order of precedence:
    # 1) start/stop in progress: start/stop request is nacked and the script exits
    # 2) start/stop in progress: status request shows status but 'system not ready'
    # 3) status in progress: start/stop request waits out the status req and then continues on
    # 4) status request results in status check whether or not another status request is in progress
    # 5) start/stop request results in taking the start/stop/restart action
    if [ -e $stateChangeMarkerFile -a $stateChangeReq -eq $START_STOP_REQ ]; then
       logEcho "Service start/restart/stop already in progress"
       stateChangeCollision=1
       RETVAL=1
       ### NOTE: we are exiting the script here!
       exit
    elif [ -e $stateChangeMarkerFile -a $stateChangeReq -eq $STATUS_REQ ]; then
       $TOUCH $statusReqMarkerFile
       returnVal=1
    elif [ $statusCount -ne 0 -a $stateChangeReq -eq $START_STOP_REQ ]; then
       # wait here until the status request ends so we can continue with
       # requested state change.  status takes roughly 5 seconds or so. if
       # we are still in here 20 seconds later, something is seriously wrong!
       loopCount=0;
       while [ $statusCount -ne 0 -a $loopCount -lt 20 ]
       do
           $LOGGER -t $PROG "Waiting for status request to complete..."
           $SLEEP 1;
           statusCount=`$FIND $statusReqMarkerDir -name "${statusReqMarkerBase}.*" -print | $WC -l`
           loopCount=$((loopCount+1))
       done
       if [ $statusCount -ne 0 ]; then
          $LOGGER -t $PROG "Status request never ended: Continuing on with state change anyway..."
       else
          $LOGGER -t $PROG "Status request ended: Continuing on with state change..."
       fi
       # Status change request: while handling the status request, create marker file as
       # we are going to accept status change request if we are here
       $TOUCH $stateChangeMarkerFile
    elif [ $stateChangeReq -eq $STATUS_REQ ]; then
       # whether or not status in progress already, we have different files
       $TOUCH $statusReqMarkerFile
    elif [ $stateChangeReq -eq $START_STOP_REQ ]; then
       $TOUCH $stateChangeMarkerFile
    fi

    return $returnVal
}

# we use a special exit routine to make sure we cleanup the exclusivity lock
# no matter how we exit
cleanExit()
{
    # cleanup the exclusivity lock file, but not if we have a collision. in
    # that case the lock file is cleaned up when the usage we are colliding
    # with exits.
    if [ $stateChangeCollision -eq 0 -a $stateChangeRequest -eq 1 ]; then
       $RM -f $stateChangeMarkerFile > /dev/null 2>&1
    fi
    if [ $statusRequest -eq 1 ]; then
       $RM -f $statusReqMarkerFile > /dev/null 2>&1
    fi

    exit $RETVAL
}

# due to unmounting the log directory, we need to avoid running any
# script from the evlog dir when we will start/stop/restart the service.
# if we run it from the log directory, the terminal could be ripped out
# from under us.
# note: cannot use pwd since 'service' resets pwd to '/'
# note: fuser will return 'c' with the pid to indicate 'in that directory'
# note: fuser returns all pids if not mounted, so check first if mounted
checkLocation()
{
    $MOUNT | $GREP -q drbd
    if [ $? -eq 0 ]; then
       drbdDir=`$MOUNT | $EGREP "drbd" | $AWK {'printf $3'}`
       ppidLoc="${PPID}c"
       for evlogPid in `$FUSER -m $drbdDir -n file 2>&1 | $AWK -F: '{print $2}'`
       do
         if [ "$evlogPid" == "$ppidLoc" ]; then
            logEcho "Service state change not allowed from a drbd mounted directory.  Please leave the directory and try again."
            RETVAL=1
            exit
         fi
       done
    fi
}

# prevent starting if in FIPS critical error state
checkFipsCriticalError()
{
    fipsModeEnabled="$($GREP "fipsMode=enabled" $SBXCONF_FILE)"
    if [ -n "$fipsModeEnabled" ]; then
        selftest=$($HEAD -1 /var/lib/fips/state|$EGREP "^selftest")
        if [ -n "$selftest" ]; then
            logEcho "In FIPS-140-3 selftest state. Wait for 60 sec for it to complete." 
            $SLEEP 60
        fi
    
        criticalError=$($HEAD -1 /var/lib/fips/state|$EGREP "^criticalerror")
    
        if [ -n "$criticalError" ]; then
            logEcho "In FIPS-140-3 critical error state. SBX cannot be started. May require reboot to clear the critical error state." 
            entries="$($HEAD -5 /var/lib/fips/state)"
            logEcho "$entries"
            RETVAL=1
            exit
        fi
    fi
}

# prevent starting if diagnostics are running
checkDiags()
{
    pid="$(__pids_pidof testAppDiagConnexip5200)"
    if [ -n "$pid" ]; then
        logEcho "Service state cannot be changed while diagnostics are running"
        RETVAL=1
        exit
    fi
}

# prevent starting if loadConfig is not complete
checkLoadConfig()
{
    # Check for loadConfig Marker file
    # Both marker created while starting loadConfig. One is stored in tmp path to check identify that load config is in progress. 
    # Other one will persist even after reboot if loadConfig is not complete, which means that something went wrong during loadConfig.
    if [ -f $LOAD_CONFIG_MARKER_FILE ]; then
        if [ -f $LOAD_CONFIG_TMP_MARKER_FILE ]; then
            logEcho "Load Config is in progress, Please wait. SBC Application will be started automatically"
        else
            logEcho "Cannot start SBC as the loadConfig is not Complete. System is in unstable state, Please contact Ribbon Communication customer care for support."
        fi

        RETVAL=1
        exit
    fi
}

validateDsbcPersonality()
{
    local dsbcfile="$PERSONALITYTYPE_TXT"
    local dsbcpersonality=`$CAT $dsbcfile`
    local hapair="$($GREP "peerCeName=none" $SBXCONF_FILE)"
    if [[ "$hapair" != "peerCeName=none" ]]; then 
        local peerdsbcpersonality=`$SONUS_PEER_CNX_EXPECT $CAT $dsbcfile 2> /dev/null`
        peerdsbcpersonality="$($ECHO -e "${peerdsbcpersonality}" | $TR -d '[[:space:]]')"
        peerdsbcpersonality=${peerdsbcpersonality/Loginfailed/}
        if [[ "$dsbcpersonality" != "isbc" && "$peerdsbcpersonality" != "isbc" ]]; then
          logEcho "DSBC Personality: $dsbcpersonality, Peer DSBC Personality: ${peerdsbcpersonality:-unknown}"
        fi
        if [ "$dsbcpersonality" != "$peerdsbcpersonality" ] && [[ "$peerdsbcpersonality" == "isbc" || "$peerdsbcpersonality" == "ssbc" || "$peerdsbcpersonality" == "msbc" || "$peerdsbcpersonality" == "mrfp" || "$peerdsbcpersonality" == "slb" ]]; then
            $ECHO -e "DSBC Personality: $dsbcpersonality, Peer DSBC Personality: $peerdsbcpersonality \nPeer DSBC Personality Mismatch, unable to start sbx service" >> $startupLog
            $ECHO -e "DSBC Personality: $dsbcpersonality, Peer DSBC Personality: $peerdsbcpersonality \nPeer DSBC Personality Mismatch, unable to start sbx service" > $startupLog
            logEcho "Peer DSBC Personality Mismatch, unable to start sbx service"
            RETVAL=1
            exit
        fi
    else
        logEcho "DSBC Personality: $dsbcpersonality, no peer"
    fi
}

updateNodeId()
{
  nodeId=`$PYTHON3 $installRoot/serf/serfGetSelfParams.py assignedSelfNodeId`
  $REMOVEENV_PL -n RGM_NODE_ID
  $ADDENV_PL -n RGM_NODE_ID -v $nodeId
}

# Check if booted off P2 to do install/upgrade
# If yes, we need to kickoff install/upgrade based on Marker file
checkForUpgradeRevert()
{
    local found=false

    # Check for Install/Upgrade Marker file
    if [ -f $installUpgradeMarker ]; then
        case=`$GREP "case=" $installUpgradeMarker |$AWK -F= '{print $2}'`;
        installUpgradeCmd=`$GREP "command=" $installUpgradeMarker |$AWK -F= '{print $2}'`;
        if [ $case == "INSTALL" ]; then
	        $LOGGER -t $PROG "Invoking SBX Installation process (in background) with command [$installUpgradeCmd]... "
        elif [ $case == "UPGRADE" ]; then
	        $LOGGER -t $PROG "Invoking SBX Upgrade process (in background) with command [$installUpgradeCmd]... "
        elif [ $case == "LSWU" ]; then
            $LOGGER -t $PROG "Invoking SBX LSWU process (in background) with command [$installUpgradeCmd]... "
	    else
	        $LOGGER -t $PROG "ERROR: Unknown option provided, cant resolve Install/Upgrade state.. Provided case=$case from marker=$installUpgradeMarker"
	        return 1
	    fi
	    currDir=$PWD
	    cd $stagingDir

        for i in "${installUpgradeCmdArray[@]}"
        do
            if [[ "`echo $installUpgradeCmd | sed 's/ *$//g'`" == "$i"* ]]; then
                found=true
                break
            fi
        done
        if [ "$found" = true ] ; then
            $installUpgradeCmd 2>&1 &
        else
            $LOGGER -t $PROG "ERROR: Unknown command provided, cant proceed with install/Upgrade command.. Provided command=$installUpgradeCmd for case=$case from marker=$installUpgradeMarker"
            return 1
        fi

        cd $currDir
	    # Move Install/Upgrade Marker File to its backup
        $MV -f $installUpgradeMarker $installUpgradeMarker.backup
	    return 1
    elif [ -e $upgradeMarker ]; then
        upgradeType=`$CAT $upgradeMarker | $GREP "case=" |$AWK -F "case=" {'print \$2'}`
        $RM -f $upgradeMarker.backup
        $MV -f $upgradeMarker $upgradeMarker.backup
        $LOGGER -t $PROG "Invoking reconfigHw.pl (in background) for upgrade case $upgradeType..."
        if [ $upgradeType == "OFFLINE_UPGRADE" ]; then
           $PERL $RECONFIG_HW_PL -u $upgradeType 2>&1 | $TEE -a $SBX_UPDATE_OUT &
        else
           $PERL $RECONFIG_HW_PL -u $upgradeType 2>&1 | $TEE -a $UPGRADE_OUT &
        fi
        return 1
    fi
    return 0
}

# This function will generate down event for self node
generateDownEvent()
{
    local -A serfData
    getAllSerfParams serfData

    node="${serfData["nodeName"]}"
    nodeId="${serfData["assignedSelfNodeId"]}"
    serviceId="${serfData["assignedSelfServiceId"]}"
    haIP="${serfData["haIp"]}"
    assignedRole="${serfData["assignedRole"]}"
    currentRole="${serfData["currentRole"]}"
    serfTimeStamp=`$DATE +%s`
    hV="${serfData["hV"]}"
    pV="${serfData["pV"]}"
    oV="${serfData["oV"]}"
    appVersion="${serfData["appVersion"]}"
 
    $LOGGER -t `$BASENAME $0` "Generating down event for node [$node], nodeId [$nodeId], serviceId [$serviceId], HA IP [$haIP], assignedRole [$assignedRole], timeStamp [$serfTimeStamp] ..."
    $installRoot/serf/serf event -coalesce=false down "nodeName=$node,assignedRole=$assignedRole,currentRole=down,nodeId=$nodeId,serviceId=$serviceId,ha0IP=$haIP,syncCompleted=false,timeStamp=$serfTimeStamp,pV=$pV,oV=$oV,hV=$hV,appVersion=$appVersion" 
}

# This function will send the switchover event to peer i.e. standby node if 
# this node is currently running in active role.
checkAndSendSwitchoverEvent() {

    nodeName=`$PYTHON3 $SERF_GET_SELF_PARAMS_PY node_name`

    # Send the switchover event only in case if self current role is 'active'.
    if [ -e $NODE_SERVICEID_MARKER_FILE ];then
        currentRole=$($GREP "currentRole=" $NODE_SERVICEID_MARKER_FILE | $AWK -F "=" '{print $2}')
        serviceId=$($GREP "serviceId=" $NODE_SERVICEID_MARKER_FILE | $AWK -F "=" '{print $2}')
        if [[ "$currentRole" == "active" && "$serviceId" != "99" ]]; then
           $LOGGER -t `$BASENAME $0` "Sending the switchover event to Standby with self node name [$nodeName] and self serviceId [$serviceId]..."
           `$SBC_INTF_SH switchover $nodeName $serviceId`
           $LOGGER -t `$BASENAME $0` "Switchover event sent."
        else
           $LOGGER -t `$BASENAME $0` "Not sending switchover event. Role from marker: $currentRole, serviceId from marker:$serviceId"
        fi
    else
        $LOGGER -t `$BASENAME $0` "ServiceId marker does not exist. So not sending switchover event."
    fi
}

# This function will check for model update marker on peer node.
# If present on peer node, it will create stub processes and model update markers on self node for revert case.
checkAndSetupModelForRevert() {
    $SONUS_PEER_CNX_EXPECT $CAT $PEER_DYNAMIC_HA_NEW_COMPS > $DYNAMIC_HA_NEW_COMPS
    retVal=$?
    if [ $retVal -eq 0 ]; then
        $GREP -q "No such file or directory" $DYNAMIC_HA_NEW_COMPS
        if [ $? -eq  0 ]; then
            $LOGGER -t $PROG "Model update marker does not exist on peer node, continuing..."
            $RM -f $DYNAMIC_HA_NEW_COMPS
            return 0
        fi
    else
        $LOGGER -t $PROG "Unable to get model update marker from peer, return value: [$retVal], continuing..."
        $RM -f $DYNAMIC_HA_NEW_COMPS
        return 0
    fi

    while IFS="=" read -r compKey compValue; do
        if [ ! -f $SONUS_OPENCLOVIS_BIN_DIR/$compValue ]; then 
            $LOGGER -t $PROG "Stubbing new component: $compValue"
            $LN -fs $SONUS_OPENCLOVIS_BIN_DIR/CE_2N_Comp_SmProcess.stub $SONUS_OPENCLOVIS_BIN_DIR/$compValue
        else
            $LOGGER -t $PROG "Component: $compValue already exists !!!"
        fi
    done < "$DYNAMIC_HA_NEW_COMPS"

    $LOGGER -t $PROG "Creating performModelUpdate with value: updateModel_dynha and selfModelUpdatePending for revert case."
    $ECHO "updateModel_dynha" > $dynamicHARequiredFile
    $CHOWN sonusadmin:sonus $dynamicHARequiredFile
    $ECHO "This marker will be removed by SM" > $SELF_MODEL_UPDATE_PENDING
    $CHOWN sonusadmin:sonus $SELF_MODEL_UPDATE_PENDING    

    # Remove dynamic HA marker files from self and peer node.
    $RM -f $DYNAMIC_HA_NEW_COMPS
    $SONUS_PEER_CNX_EXPECT $RM -f $PEER_DYNAMIC_HA_NEW_COMPS
}

#
# Output how we were called, and by whom, for traceability purposes
# Don't do it for status since that is constantly called and is not
# service impacting in any way.
#
if [ "$1" != "status" ]; then
    parentProc=$($PS -ho pid,ppid,tty,user,args -p $PPID)
    $LOGGER -t $PROG "Called with arg [$1] by pid [$PPID]: $parentProc"
fi

#
# See how we were called...
#
case "$1" in
    start)
        $CAT /dev/null > $startupLog

        # check if starting up due to a reboot
        if [ -f $SBX_BOOT_AFTER_SYSTEM_REBOOT ];then
            $RM -f $SBX_BOOT_AFTER_SYSTEM_REBOOT 
            $LOGGER -t $PROG "Service starting after reboot (setting startingAfterReboot=true)"
            startingAfterReboot=true
        fi

        # prevent starting if in FIPS critical error state
        checkFipsCriticalError

        # prevent starting up wile diags are running
        checkDiags

        # prevent starting if loadConfig is not complete
        checkLoadConfig

        # For non-5100 hardware, Check If we booted from P2
        hwType=$(getHwType)

        # If in the middle of reboot, prevent starting. Happens sometimes because of older releases of PM
        if [ -e /tmp/rebootInProgressKey.key ];then
           $LOGGER -t $PROG "Called with start and /tmp/rebootInProgressKey.key exists, returning..."
           exit 0
        fi

        checkForUpgradeRevert
        if [ $? -ne 0 ]; then
           $LOGGER -t $PROG "Exiting the $subSystem startup, application not started.... "
           exit
        fi

        isUpgradeInProgress=`$PYTHON3 $CLUSTER_MANAGER_PY 'checkIfSweUpgradeInProgress' | $TAIL -n1`

        # In case of SWe/Cloud 1:1 deployed using image (qcow2/vmdk), check for model update
        # marker on peer node if not present on self node.
        # If the marker is present on peer node, create model update markers on self for revert case.
        if [[ "$hwType" == "ConnexIP5000" && "$haMode" == "1to1" && $isLcaEnabled -eq 0 
              && ! -e "$DYNAMIC_HA_NEW_COMPS" && "$isUpgradeInProgress" == "False" ]];
        then
            $LOGGER -t $PROG "Checking for model update marker on peer node ..."
            checkAndSetupModelForRevert
        fi
         
        cnf=$(isCnf)
        if [ $cnf -ne 0 ]; then
            cpu_count=`$CAT $SWE_CPU_INFO_K8S`
        else
            cpu_count=`$NPROC --all`
        fi

        if [[ "$hostSubType" == "virtual" || "$hostSubType" == "virtualCloud" || "$hostSubType" == "virtualContainer" ]]; then
            updateSoftSbcConfigFile=`basename $UPDATE_SOFTSBC_CONFIG_PL`
            $LOGGER -t $PROG "Calling $updateSoftSbcConfigFile to setup number of SCMs, hostSubType: $hostSubType"
            scriptOutput=`$UPDATE_SOFTSBC_CONFIG_PL`
            $LOGGER -t $PROG "$scriptOutput"
            $LOGGER -t $PROG "Completed $updateSoftSbcConfigFile"
        fi

        if [[ "$hostSubType" == "virtual" && $startingAfterReboot == "true" ]];
        then
            # Check if second management port is configured on the system.
            isSecondMgmtPortConfigured
            secondMgmtPortRetVal=$?

            # In case of SWe 1:1 deployed using ISO and second management port is configured,
            # create XML seed data for second management port.
            if [[ $isLcaEnabled -eq 1 && $secondMgmtPortRetVal -eq 1 ]]; then
                # Call script to configure XML seed data for second management port.
                $CONFIGURE_SECOND_MGMT_PORT_SH
            fi
        fi

        # prevent simultaneous calls and calling from a location
        # that can cause the terminal to go away from under us.
        checkExclusivity $START_STOP_REQ
        if [[ "$hostSubType" != "virtualCloud" && "$hostSubType" != "virtualContainer" ]]; then
           checkLocation
        fi
        
        #If the /home partition is 100% the application should not come up.
        #This check will fail is the '/home' is more than 90% used.
        export preUpgradeCheckLogs=$SONUS_TMP_DIR/sbxStartDiskUsage.log
        $TOUCH $preUpgradeCheckLogs
        $CHMOD 770 $preUpgradeCheckLogs

        $CHECK_DISK_USAGE_SH -p 20
        $GREP "Not enough available disk space" $preUpgradeCheckLogs &> /dev/null
        if [ $? == 0 ]; then
            $LOGGER -t $PROG "Not enough disk space is available. Exiting the $subSystem startup.."
            logEcho "Not enough disk space is available. Exiting the $subSystem startup.."
            rm -f $preUpgradeCheckLogs
            exit
        fi
        rm -f $preUpgradeCheckLogs

        # Need to check if peer is configured with same dsbc personality before starting
        validateDsbcPersonality

        # Make sure drbd is in a consistent state
        # check drbd status, a return of 3 indicates not loaded
        if [[ "$hostSubType" != "virtualCloud" && "$haMode" == "1to1" && "$hostSubType" != "virtualContainer" ]]; then
            $SERVICE_SH drbd status > /dev/null 2>&1
            if [ $? -ne 3 ]; then  
                drbdCstate=`$DRBDADM cstate mirror 2> /dev/null`
                if [[ "$drbdCstate" == "Unconfigured" ]]; then
                    $SERVICE_SH drbd reload > /dev/null 2>&1
                fi
            fi 
        fi

        if [[ "$haMode" == "Nto1" ]]; then
          updateNodeId
        fi
         
        # check bfd status, check if the .bfdpid marker file exist or not
        if [[ ! -e "$statusBfdMarkerFile" ]]; then
            $SYSTEMCTL start bfd
        fi

        # note: use retVal and not RETVAL here so that we don't return an
        # incorrect response due to the expected result of 'status' being 1
        $ASP status > /dev/null 2>&1
        retVal=$?
        if [ $retVal -eq 1 ]; then
           temp=$(($GREP "fipsMode" $SBXCONF_FILE 2>/dev/null)|$CUT -c10-)
           if [ "$temp" == "enabled" ]
           then
               if [ "$($AWK '/criticalerror/ { print NR; exit }' /var/lib/fips/state 2>/dev/null)" == "1" ]
               then
                   $ECHO "In FIPS-140-3 critical error state. SBX cannot be started. May require reboot to clear the critical error state." 
                   $HEAD -5 /var/lib/fips/state
                   exit 1
                fi
           fi
           # Check whether watchdog process is running. If yes, then exit the script and do not start SBC service.
           watchdog_pid=`$PS -ef | $GREP $ASPWATCHDOG | $GREP -v grep | $AWK '{print $2}' | $PASTE -s -d" \n"`
           if [ "$watchdog_pid" != "" ]; then
              logEcho "The service is already starting up or going down as safplus_watchdog is currently running."
              exit 1
           else
              start
           fi
        else
           logEcho "The service is already running"
        fi
    ;;

    stop)
        #  see if stopping due to OS shutdown/restart. shutting down is true
        # for both poweroff and restart.
        osShutdown=$(isSystemShuttingDown)

        # prevent simultaneous calls and calling from a location
        # that can cause the terminal to go away from under us.
        # also prevent cleaning up if the diags are running.
        # NOTE: If the OS is going down, don't bother with the checks
        # since we are unable to stop the shutdown/reboot process
        # anyway.  We would have had to stop it prior to shutdown/reboot
        # being executed.  Just blast the service away, potentially closing
        # someones terminal session if they are in the drbd mount point.
        if [ $osShutdown -eq 0 ]; then
          checkDiags
          checkExclusivity $START_STOP_REQ
          if [[ "$hostSubType" != "virtualCloud" && "$hostSubType" != "virtualContainer" ]]; then
             checkLocation
          fi
          stop
        else
          zap
        fi
    ;;
    swe_crash)
        # only to be called by monit when SWe_NP or SWe_UXPAD crashes.  
        # We quickly switch over the media and then call for a reboot.  
        # It is not handled by just calling reboot/shutdown because the normal
        # ordering takes too long and we might tear down the calls
        # before the shutdown call executes the sbx init.d script.
        # NOTE: Since we need to bring the OS down, don't bother
        # with the checks.
        # NOTE: the reboot call will cause another call to stop the
        # sbx service, but there is no harm there.
        if [ "$#" -eq 2 ]; then
            process=$2
            pid=`$PS -ef | $GREP  PrsProcess | $GREP -vc  grep`
            if [ $pid -eq 1 ]; then
                $LOGGER -t `$BASENAME $0` "Killing Prs Process due to $process failure.... "
                $DISABLE_SAFPLUS_RESTART_SH
                $ECHO 1 > $PRS_CPS_REBOOT
                pidPrs=`$PS -ef | $GREP  PrsProcess | $GREP -v  grep| $AWK '{print $2}'`
                kill -9 $pidPrs
                if [ "$haMode" == "Nto1" ]; then
                    # Generate down event before sending switchover to avoid any serviceId collisions
                    generateDownEvent
                    # Send switchover event to peer if running in 'active' role
                    checkAndSendSwitchoverEvent
                fi

                # Reboot only when PRS process is running otherwise system will continue 
                # to reboot if SWe_NP fails to come up on first bootup.
                if [[ "$hostSubType" != "virtualContainer" ]]; then
                    $LOGGER -t `$BASENAME $0` "Rebooting system due to swe crash.... "
                    $SHUTDOWN_SH -t 0
                fi
            fi
            $MONIT unmonitor $process
        else
            $LOGGER -t $PROG "$MONIT unmonitor failed as no process name provided"
        fi
    ;;

    restart)
        # prevent starting if in FIPS critical error state
        checkFipsCriticalError

        # prevent starting up wile diags are running
        checkDiags

        # prevent simultaneous calls and calling from a location
        # that can cause the terminal to go away from under us.
        checkExclusivity $START_STOP_REQ
        if [[ "$hostSubType" != "virtualCloud" && "$hostSubType" != "virtualContainer" ]]; then
           checkLocation
        fi
        restart
	;;

    status)
        # check for exclusive script access. this is needed since
        # there is a window whereby on a 'start' no processes have
        # been started yet and the status command will report stopped,
        # even though the request has already been made to start the
        # processes.
        checkExclusivity $STATUS_REQ
        stateChangeUnderway=$?

        # set some flags to help decide if we should run safplus_console
        procStopped=0;
        procStarted=0;

        # check for the ASP's watchdog, which is a python program.
        # the watchdog sometimes spawns another, so we could get two
        # entries.  use 'paste' so the output is pretty-printed.
        pid=`$PS -ef | $GREP $ASPWATCHDOG | $GREP -v grep | $AWK '{print $2}' | $PASTE -s -d" \n"`
        if [ "$pid" != "" ]; then
          procStarted=1;
          $ECHO "$ASPWATCHDOG_NAME (pid $pid) is running..."
        else
          procStopped=1;
          $ECHO "$ASPWATCHDOG_NAME is stopped"
        fi

        # setup the process list in order of startup
        procs="$ASPAMF"
        for f in $INSTALLED_COMPS; do
            procs="$procs ${MODEL_PROC_PREFIX}${f}"
        done

        # run the status command for each process
        for f in $procs; do
            status $f
            if [ $? -ne 0 ]; then
              procStopped=1
            else
              procStarted=1
            fi
        done

        # if every process is running, check if the roles have been fully
        # assigned. if not, we are not really ready yet.  We can use the
        # status of the last process checked as a rough guide to see if
        # everyone is running, since it is last to start and first to
        # terminate
        # NOTE: if doing a delayed model update, the new components (if any)
        # will be stopped until the update is performed.  in that case
        # we still want to check the status, since we will come up fully.
        $GREP -q -e skipUpdate -e updateModelOnSwitchover $dynamicHARequiredFile 2> /dev/null
        modelUpdRes=$?
        if [[ $procStopped -eq 0 || $modelUpdRes -eq 0 ]]; then
            # Get the current role from safplus_console
            getCurrentRole

            if [ "$currentRole" != "" ]; then
               $ECHO -e "\n\t** Service running [$currentRole] **\n"
            else
			   # For backward compatibility for any scripts testing the
			   # return value from service sbx status. The status function 
               # returned 3 when the function detected services stopped.
               $ECHO -e "\n\t** Service not ready **\n"
               RETVAL=3
            fi
        else
		    # For backward compatibility for any scripts testing the
		    # return value from service sbx status. The status function 
            # returned 3 when the function detected services stopped.
            if [ $procStarted -eq 0 ]; then
               if [ $stateChangeUnderway -eq 1 ]; then
                  # service start/stop was issued and is in progress
                  $ECHO -e "\n\t** Service not ready due to start/stop request **\n"
               else
                  $ECHO -e "\n\t** Service not running **\n"
               fi
            else
              $ECHO -e "\n\t** Service not ready **\n"
            fi
            RETVAL=3
        fi
	;;

    reload)
    # honor init.d script semantics:
    # no /etc/default/sbx config file so due nothing
    ;;

    force-reload)
    # honor init.d script semantics:
    # no /etc/default/sbx config file so due nothing
    ;;

    *)
        logEcho "Usage: $PROG {start|stop|status|restart|reload|force-reload|swe_crash <process_name>}"
        RETVAL=1
    ;;
esac


# NOTE: exit is trapped and cleanExit is called instead with the value
# of RETVAL passed to it.  This allows cleanExit to perform cleanup and then
# exit the script with the proper return value.
exit
