Skip to content

Commit

Permalink
Merge pull request #261 from fmherschel/angi-devel
Browse files Browse the repository at this point in the history
Angi devel with fencing alert agent
  • Loading branch information
fmherschel authored Jul 8, 2024
2 parents 6c0fe72 + 883bf90 commit 87a5781
Show file tree
Hide file tree
Showing 79 changed files with 1,114 additions and 249 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

FILE_LIST = LICENSE \
README.md \
alert \
crm_cfg \
icons \
man \
Expand Down
4 changes: 4 additions & 0 deletions SAPHanaSR-angi.spec
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ install -m 0644 srHook/susCostOpt.py %{buildroot}/usr/share/%{name}/
install -m 0644 srHook/susChkSrv.py %{buildroot}/usr/share/%{name}/
install -m 0444 srHook/global.ini_* %{buildroot}/usr/share/%{name}/samples

# alert manager
install -m 0755 alert/SAPHanaSR-alert-fencing %{buildroot}/usr/bin

# crm config templates
install -m 0644 crm_cfg/angi-ScaleUp/[0-9]*_* %{buildroot}/usr/share/%{name}/samples/crm_cfg/angi-ScaleUp

Expand Down Expand Up @@ -134,6 +137,7 @@ install -m 0444 tools/saphana_sr_tools.py %{buildroot}/usr/lib/%{name}
/usr/bin/SAPHanaSR-filter-legacy
/usr/bin/SAPHanaSR-hookHelper
/usr/bin/SAPHanaSR-manageProvider
/usr/bin/SAPHanaSR-alert-fencing

%license LICENSE
%dir %{_docdir}/%{name}
Expand Down
4 changes: 2 additions & 2 deletions SAPHanaSR-tester.spec
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ License: GPL-2.0
Group: Productivity/Clustering/HA
AutoReqProv: on
Summary: Test suite for SAPHanaSR clusters
Version: 1.2.13
Version: 1.2.14
Release: 0
Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/

Expand Down Expand Up @@ -80,7 +80,7 @@ install -m 0644 test/saphana_sr_test.py %{buildroot}/usr/lib/%{name}
install -m 0755 test/cs_* %{buildroot}/usr/bin
install -m 0755 test/callTest* %{buildroot}/usr/bin
install -m 0755 test/loopTests* %{buildroot}/usr/bin
install -m 0755 test/sct_* %{buildroot}/usr/bin
install -m 0755 test/bin/sct_* %{buildroot}/usr/bin

# client files
install -m 0755 tools/SAPHanaSR-showAttr %{buildroot}/usr/bin
Expand Down
102 changes: 102 additions & 0 deletions alert/SAPHanaSR-alert-fencing
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/bin/bash
#
# SAPHanaSR-alert
# Author: Lars Pinne Fabian Herschel, June 2024
# Support: [email protected]
# License: GNU General Public License (GPL)
# Copyright: (c) 2024 SUSE LLC
# Version: 2024-06-18-15:33
#
# crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes
# crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing attributes alert_uptime_threshold=300
#

logger_tag="SAPHanaSR-alert-fencing"
logger="/usr/bin/logger"

# ON_FAIL_ACTION="${OCF_RESKEY_ON_FAIL_ACTION:-proceed}"
CRM_alert_recipient="${CRM_alert_recipient:-/dev/null}"
crm_alert_kind="${CRM_alert_kind:-manual call}"
crm_alert_node="${CRM_alert_node:-$HOSTNAME}"
crm_alert_desc="${CRM_alert_desc:-no description provided}"

$logger -t "$logger_tag" "AH: begin event '$crm_alert_kind'"
cache_file="/run/crm/SAPHanaSR_site_cache"

alert_uptime_threshold="${alert_uptime_threshold:-300}"

IFS=. read -r sys_uptime REST </proc/uptime
/usr/bin/logger -t "$logger_tag" "AH: begin event '$crm_alert_kind' (uptime=$sys_uptime, alert_uptime_threshold=$alert_uptime_threshold)"


function process_fencing()
{
# SAPHanaSR_site_cache has format (each line) host:site_name
# figure out fenced site

if [[ "$sys_uptime" -ge "$alert_uptime_threshold" ]]; then
if [[ -e "$cache_file" ]]; then
fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file")
local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file")
$logger -t "$logger_tag" "INFO: cache_file=$cache_file, crm_alert_node=$crm_alert_node"
$logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name"
if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then
$logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)"
sleep 10
/usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?"
if [[ "$rc" != "0" ]]; then
$logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc"
fi
else
$logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)"
fi
else
$logger -t "$logger_tag" "DEC: NO FENCE (no cache)"
fi
else
$logger -t "$logger_tag" "DEC: NO FENCE (uptime < alert_uptime_threshold)"
fi
}

function check_fencing()
{
# SAPHanaSR_site_cache has format (each line) host:site_name
# figure out fenced site

if [[ -e "$cache_file" ]]; then
fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file")
local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file")
$logger -t "$logger_tag" "INFO: cache_file=$cache_file"
$logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name"
if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then
$logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)"
else
$logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)"
fi
else
$logger -t "$logger_tag" "DEC: NO FENCE (no cache)"
fi
}

case "$crm_alert_kind" in
node|nodes)
msg="Node '${crm_alert_node}' is now '${crm_alert_desc}'"
$logger -t "$logger_tag" "INFO: $msg"
;;
fencing)
msg="Fencing for '${crm_alert_node}': ${crm_alert_desc}"
$logger -t "$logger_tag" "INFO: $msg"
process_fencing
;;
check)
msg="Checking for '${crm_alert_node}': ${crm_alert_desc}"
$logger -t "$logger_tag" "INFO: $msg"
check_fencing
;;
*)
msg="Unhandled '$crm_alert_kind' alert (${crm_alert_desc})"
$logger -t "$logger_tag" "INFO: $msg"
;;
esac
$logger -t "$logger_tag" "AH: end event '$crm_alert_kind'"
#
5 changes: 3 additions & 2 deletions man-tester/SAPHanaSR-tester.7
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.\" Version: 1.001
.\"
.TH SAPHanaSR-tester 7 "29 Feb 2024" "" "SAPHanaSR-angi"
.TH SAPHanaSR-tester 7 "04 Jul 2024" "" "SAPHanaSR-angi"
.\"
.SH NAME
SAPHanaSR-tester \- Functional testing for SAPHanaSR clusters.
Expand Down Expand Up @@ -248,8 +248,9 @@ Please report any other feedback and suggestions to [email protected].
.\"
.SH SEE ALSO
\fBSAPHanaSR-testCluster\fP(8) , \fBSAPHanaSR-tests-syntax\fP(5) ,
\fBSAPHanaSR-tests-description\fP(7) ,
\fBSAPHanaSR-tests-description\fP(7) ,
\fBSAPHanaSR-tests-angi-ScaleUp\fP(7) , \fBSAPHanaSR-tests-angi-ScaleOut\fP(7) ,
\fBSAPHanaSR-tests-classic-ScaleUp\fP(7) ,
\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , \fBcrm_mon\fP(8) ,
\fBssh-keygen\fP(1) , \fBssh-copy-id\fP(1) ,
.br
Expand Down
14 changes: 10 additions & 4 deletions man-tester/SAPHanaSR-tests-angi-ScaleOut.7
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ Kill primary master indexserver, for susChkSrv.py.
Kill primary master instance.
.TP
\fBkill_prim_node\fP
Kill primary master node.
Kill primary master node (no fencing alert agent configured).
.TP
\fBkill_prim_site\fP
Kill secondary site nodes. Not yet implemented.
\fBkill_prim_node_fencing_alert\fP
Kill primary master node (fencing alert agent configured).
.TP
\fBkill_prim_worker_indexserver\fP
Kill primary worker indexserver, for susChkSrv.py.
Expand All @@ -59,7 +59,13 @@ Kill primary worker indexserver, for susChkSrv.py.
Kill primary worker instance.
.TP
\fBkill_prim_worker_node\fP
Kill primary worker node.
Kill primary worker node (no fencing alert agent configured).
.TP
\fBkill_prim_worker_node_fencing_alert\fP
Kill primary master node (fencing alert agent configured).
.TP
\fBkill_prim_site\fP
Kill primary site nodes. Not yet implemented.
.TP
\fBkill_secn_indexserver\fP
Kill secondary master indexserver, for susChkSrv.py.
Expand Down
119 changes: 119 additions & 0 deletions man-tester/SAPHanaSR-tests-angi-ScaleOut_on-fail-fence.7
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
.\" Version: 1.001
.\"
.TH SAPHanaSR-tests-angi-ScaleOut 7 "06 Jul 2024" "" "SAPHanaSR-angi"
.\"
.SH NAME
SAPHanaSR-tests-angi-ScaleOut_on-fail-fence \- Functional tests for SAPHanaSR Scale-Out with immediate fencing.
.PP
.\"
.SH DESCRIPTION
.PP
Functional test are shipped for the scale-out ERP scenario when configured for
immediate fencing. See manual page ocf_suse_SAPHanaController(7), susChkSrv.py(7)
nd SAPHanaSR-alert-fencing(8) for the respective configuration details. This
tests could be run out-of-the-box. The test cases are defined in dedicated files.
See manual page SAPHanaSR-tests-syntax(5) for syntax details. Details like
performed steps or expected behaviour of cluster and HANA are explained in
SAPHanaSR-tests-description(7).

Each test can be executed by running the command SAPHanaSR-testCluster with
appropriate parameters. See manual page SAPHanaSR-testCluster(8).
.PP
Predefined functional tests specific for scale-out ERP with immediate fencing overwiev:
.TP
\fBfreeze_prim_master_nfs_fencing_alert\fP
Freeze HANA NFS on primary master node.
.TP
\fBkill_prim_indexserver_on_fail_fence\fP
Kill primary master indexserver, for susChkSrv.py.
.TP
\fBkill_prim_inst\fP
Kill primary master instance.
.TP
\fBkill_prim_node_fencing_alert\fP
Kill primary master node (fencing alert agent configured).
.TP
\fBkill_prim_worker_indexserver_on_fail_fence\fP
Kill primary worker indexserver, for susChkSrv.py.
.TP
\fBkill_prim_worker_inst\fP
Kill primary worker instance.
.TP
\fBkill_prim_worker_node\fP
Kill primary worker node (no fencing alert agent configured).
.TP
\fBkill_prim_worker_node_fencing_alert\fP
Kill primary master node (fencing alert agent configured).
.TP
\fBkill_secn_indexserver\fP
Kill secondary master indexserver, for susChkSrv.py.
.TP
\fBkill_secn_inst\fP
Kill secondary master instance.
.TP
\fBkill_secn_node\fP
Kill secondary master node.
.TP
\fBkill_secn_worker_inst\fP
Kill secondary worker instance.
.TP
\fBkill_secn_worker_node\fP
Kill secondary worker node.
.TP
\fBkill_secn_site\fP
Kill secondary site nodes. Not yet implemented.
.PP
.\"
.SH EXAMPLES
.PP
* List tests for SAPHanaSR-angi scale-out ERP with immediate fencing scenarios
.PP
.RS 2
# ls /usr/share/SAPHanaSR-tester/json/angi-ScaleOut/*fenc*
.RE
.PP
.\"
.SH FILES
.TP
/usr/share/SAPHanaSR-tester/json/angi-ScaleOut/
functional tests for SAPHanaSR-angi scale-out ERP scenarios.
.TP
/usr/bin/sct_test_*
shell scripts for un-easy tasks on the cluster nodes.
.PP
.\"
.SH REQUIREMENTS
.PP
See the REQUIREMENTS section in SAPHanaSR-tester(7) and SAPHanaSR-angi(7).
See also ocf_suse_SAPHanaController(7), susChkSrv.py(7) and
SAPHanaSR-alert-fencing(8) for configuration.
Of course, HANA database and Linux cluster have certain requirements.
Please refer to the product documentation.
.\"
.SH BUGS
In case of any problem, please use your favourite SAP support process to open
a request for the component BC-OP-LNX-SUSE.
Please report any other feedback and suggestions to [email protected].
.PP
.\"
.SH SEE ALSO
\fBSAPHanaSR-tester\fP(7) , \fBSAPHanaSR-testCluster\fP(8) ,
\fBSAPHanaSR-tests-description\fP(7) , \fBSAPHanaSR-tests-syntax\fP(5) ,
\fBSAPHanaSR-tests-angi-ScaleOu\fP(7) ,
\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8) ,
\fBocf_suse_SAPHanaController\fP(7) , \fBsusChkSrv.py\fP(7) ,
\fBSAPHanaSR-alert-fencing\fP(8)
.PP
.\"
.SH AUTHORS
F.Herschel, L.Pinne.
.PP
.\"
.SH COPYRIGHT
(c) 2024 SUSE LLC
.br
The package SAPHanaSR-tester comes with ABSOLUTELY NO WARRANTY.
.br
For details see the GNU General Public License at
http://www.gnu.org/licenses/gpl.html
.\"
Loading

0 comments on commit 87a5781

Please sign in to comment.