From 5268cc2b82803ba8c5d2ded11b37353a6817e1da Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 8 May 2024 13:40:22 +0200 Subject: [PATCH 01/84] SAPHanaSR_maintenance_examples.7: examples --- man/SAPHanaSR_maintenance_examples.7 | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/man/SAPHanaSR_maintenance_examples.7 b/man/SAPHanaSR_maintenance_examples.7 index 5db66fc2..b6c630fb 100644 --- a/man/SAPHanaSR_maintenance_examples.7 +++ b/man/SAPHanaSR_maintenance_examples.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR_maintenance_examples 7 "25 Jan 2024" "" "SAPHanaSR" +.TH SAPHanaSR_maintenance_examples 7 "08 May 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR_maintenance_examples \- maintenance examples for SAPHanaController. @@ -297,12 +297,32 @@ This is an advanced task. 10. Please bring back the other node and register that HANA as soon as possible. If the HANA primary stays alone for too long, the log area will fill up. .RE .PP +\fB*\fR Start Linux cluster after node has been fenced. + +It is recommended to not configure the Linux cluster for always starting +autmatically on boot. Better is to start automatically only, if cluster and/or +node have been stopped cleanly. If the node has been rebooted by STONITH, the +cluster should not start automatically. If the cluster is configure that way, +some steps are needed to start the cluster after a node has been rebooted by +STONITH. STONITH via SBD is used in this example. +.PP +.RS 2 +# cs_clear_sbd_devices --all +.br +# cs_show_sbd_devices +.br +# crm cluster start +.br +# crm_mon -r +.RE +.PP .\" \fB*\fR Overview on maintenance procedure for Linux, HANA remains running, on pacemaker-2.0. It is necessary to wait for each step to complete and to check the result. It -also is necessary to test and document the whole procedure before applying in production. -See also section REQUIREMENTS below and example on checking status of HANA and cluster above. +also is necessary to test and document the whole procedure before applying in +production. See also section REQUIREMENTS below and example on checking status +of HANA and cluster above. .\" TODO details .PP .RS 2 From f0efb664aacd4da01c06885e4c9b8321aa239e56 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 24 May 2024 12:11:05 +0200 Subject: [PATCH 02/84] angi: SAPHanaSR-upgrade-to-angi-demo: zypper se - filter on packages only --- tools/SAPHanaSR-upgrade-to-angi-demo | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/SAPHanaSR-upgrade-to-angi-demo b/tools/SAPHanaSR-upgrade-to-angi-demo index e6c4aec0..3d150c3a 100755 --- a/tools/SAPHanaSR-upgrade-to-angi-demo +++ b/tools/SAPHanaSR-upgrade-to-angi-demo @@ -568,7 +568,7 @@ function f_check-prereq() { echo "ERROR: Package SAPHanaSR-tester-client installed." pre_rc=9 fi - rmt=$(zypper se $RPMNEW 2>/dev/null | grep -c $RPMNEW) + rmt=$(zypper se -t package $RPMNEW 2>/dev/null | grep -c $RPMNEW) if [ $rmt != 1 ]; then echo "ERROR: Can not find $RPMNEW in software channels." pre_rc=9 From 8d65f2e44265f73d552f98af69352b0dc3e4ce2a Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 24 May 2024 15:06:44 +0200 Subject: [PATCH 03/84] angi: added sample properties file for SAPHanaSR-showAttr --- tools/SAPHanaSR-showAttr.properties.demo.json | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 tools/SAPHanaSR-showAttr.properties.demo.json diff --git a/tools/SAPHanaSR-showAttr.properties.demo.json b/tools/SAPHanaSR-showAttr.properties.demo.json new file mode 100644 index 00000000..313166fd --- /dev/null +++ b/tools/SAPHanaSR-showAttr.properties.demo.json @@ -0,0 +1,10 @@ +{ +"selections": { + "demo": { + "global": [], + "resource": [], + "site": [".*"], + "host": ["Host", "clone_state", "roles", "score", "site", "sra", "srah"] + } + } +} From a540b3d07bd0d9f21ae111e307a81a5d84065593 Mon Sep 17 00:00:00 2001 From: lpinne Date: Mon, 3 Jun 2024 11:24:28 +0200 Subject: [PATCH 04/84] SAPHanaSR_upgrade_to_angi.7: small typos --- man/SAPHanaSR_upgrade_to_angi.7 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/SAPHanaSR_upgrade_to_angi.7 b/man/SAPHanaSR_upgrade_to_angi.7 index 007d763d..2cb27f2f 100644 --- a/man/SAPHanaSR_upgrade_to_angi.7 +++ b/man/SAPHanaSR_upgrade_to_angi.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR_upgrade_to_angi 7 "02 Apr 2024" "" "SAPHanaSR" +.TH SAPHanaSR_upgrade_to_angi 7 "02 Jun 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR_upgrade_to_angi \- How to upgrade from SAPHanaSR or SAPHanaSR-ScaleOut to SAPHanaSR-angi. @@ -16,7 +16,7 @@ fully backward compatible. Upgrading existing clusters is possible by following a defined procedure. The upgrade should lead to the same configuration as an installation from scratch. .PP -The upgrade procedure depends on an initial setup as decribed in setup guides +The upgrade procedure depends on an initial setup as described in setup guides and manual pages. See REQUIREMENTS below and in manual pages SAPHanaSR(7) or SAPHanaSR-ScaleOut(7). The procedure does not neccesarily need downtime for HANA, if planned and excuted carefully. Nevertheless, it should be done under From 76be83f72a1a0dd9404b54bafb5884b0f67411bc Mon Sep 17 00:00:00 2001 From: lpinne Date: Mon, 3 Jun 2024 16:40:18 +0200 Subject: [PATCH 05/84] SAPHanaSR-ScaleOut_basic_cluster.7 SAPHanaSR.7 SAPHanaSR_basic_cluster.7 SAPHanaSR_maintenance_examples.7 SAPHanaSR_upgrade_to_angi.7 ocf_suse_SAPHana.7: neccessary -> necessary --- man/SAPHanaSR-ScaleOut_basic_cluster.7 | 4 ++-- man/SAPHanaSR.7 | 2 +- man/SAPHanaSR_basic_cluster.7 | 4 ++-- man/SAPHanaSR_maintenance_examples.7 | 10 +++++----- man/SAPHanaSR_upgrade_to_angi.7 | 2 +- man/ocf_suse_SAPHana.7 | 4 ++-- 6 files changed, 13 insertions(+), 13 deletions(-) diff --git a/man/SAPHanaSR-ScaleOut_basic_cluster.7 b/man/SAPHanaSR-ScaleOut_basic_cluster.7 index 352f809a..50ed40ba 100644 --- a/man/SAPHanaSR-ScaleOut_basic_cluster.7 +++ b/man/SAPHanaSR-ScaleOut_basic_cluster.7 @@ -9,7 +9,7 @@ SAPHanaSR-ScaleOut_basic_cluster \- SAP HANA System Replication scale-out basic .SH DESCRIPTION .\" The SAP HANA System Replication scale-out scenario needs a certain basic -cluster configuration. Besides this necessary settings, some additional +cluster configuration. Besides this neccessary settings, some additional configurations might match specific needs. Adapting a few SAP HANA settings might be beneficial as well. .\" @@ -234,7 +234,7 @@ op_defaults op-options: \\ \fB* crm SBD stonith configuration\fR -To complete the SBD setup, it is necessary to activate SBD as STONITH/fencing +To complete the SBD setup, it is neccessary to activate SBD as STONITH/fencing mechanism in the CIB. The SBD is normally used for SAPHanaSR-ScaleOut instead of any other fencing/stonith mechanism. Example for a basic disk-based SBD resource: diff --git a/man/SAPHanaSR.7 b/man/SAPHanaSR.7 index 26e581fb..ef64e4bd 100644 --- a/man/SAPHanaSR.7 +++ b/man/SAPHanaSR.7 @@ -67,7 +67,7 @@ A primary absolutely must never be started, if the cluster does not know anything about the other site. On initial cluster start, the cluster needs to detect a valid HANA system replication setup, including system replication status (SOK) and last primary -timestamp (LPT). This is necessary to ensure data integrity. +timestamp (LPT). This is neccessary to ensure data integrity. .PP The rational behind this is shown in the following scenario: .br diff --git a/man/SAPHanaSR_basic_cluster.7 b/man/SAPHanaSR_basic_cluster.7 index 85ec5347..720ebb0f 100644 --- a/man/SAPHanaSR_basic_cluster.7 +++ b/man/SAPHanaSR_basic_cluster.7 @@ -9,7 +9,7 @@ SAPHanaSR_basic_cluster \- SAP HANA System Replication scale-up basic cluster co .SH DESCRIPTION .\" The SAP HANA System Replication scale-up scenario needs a certain basic -cluster configuration. Besides this necessary settings, some additional +cluster configuration. Besides this neccessary settings, some additional configurations might match specific needs. Adapting a few SAP HANA settings might be beneficial as well. .\" @@ -189,7 +189,7 @@ op_defaults op-options: \\ \fB* crm simple SBD stonith configuration\fR -To complete the SBD setup, it is necessary to activate SBD as STONITH/fencing +To complete the SBD setup, it is neccessary to activate SBD as STONITH/fencing mechanism in the CIB. The SBD is normally used for SAPHanaSR scale-up instead of any other fencing/stonith mechanism. Example for a basic disk-based SBD resource: diff --git a/man/SAPHanaSR_maintenance_examples.7 b/man/SAPHanaSR_maintenance_examples.7 index b6c630fb..bb3d02f7 100644 --- a/man/SAPHanaSR_maintenance_examples.7 +++ b/man/SAPHanaSR_maintenance_examples.7 @@ -270,7 +270,7 @@ In this example, node is suse11 on the future secondary site to be registered. R .PP \fB*\fR Manually start the HANA primary if only one site is available. -This might be necessary in case the cluster can not detect the status of both sites. +This might be neccessary in case the cluster can not detect the status of both sites. This is an advanced task. .PP \fBBefore doing this, make sure HANA is not primary on the other site!\fR @@ -319,8 +319,8 @@ STONITH. STONITH via SBD is used in this example. .\" \fB*\fR Overview on maintenance procedure for Linux, HANA remains running, on pacemaker-2.0. -It is necessary to wait for each step to complete and to check the result. It -also is necessary to test and document the whole procedure before applying in +It is neccessary to wait for each step to complete and to check the result. It +also is neccessary to test and document the whole procedure before applying in production. See also section REQUIREMENTS below and example on checking status of HANA and cluster above. .\" TODO details @@ -381,8 +381,8 @@ HANA gets fully stopped. This procedure can be used to update HANA, OS or hardware. HANA roles and resource status remains unchanged. -It is necessary to wait for each step to complete and to check the result. -It also is necessary to test and document the whole procedure before applying in production. +It is neccessary to wait for each step to complete and to check the result. +It also is neccessary to test and document the whole procedure before applying in production. .PP .RS 2 1. disabling pacemaker on HANA primary diff --git a/man/SAPHanaSR_upgrade_to_angi.7 b/man/SAPHanaSR_upgrade_to_angi.7 index 2cb27f2f..230bcf17 100644 --- a/man/SAPHanaSR_upgrade_to_angi.7 +++ b/man/SAPHanaSR_upgrade_to_angi.7 @@ -132,7 +132,7 @@ TODO .PP The upgrade procedure consists of four phases: preparing, removing, adding, finalising. Linux cluster and HANA are kept running. However, resource -management is disabled and the system goes thru fragiles states during the +management is disabled and the system goes thru fragile states during the upgrade. .PP .RS 2 diff --git a/man/ocf_suse_SAPHana.7 b/man/ocf_suse_SAPHana.7 index bb003b63..7fff7639 100644 --- a/man/ocf_suse_SAPHana.7 +++ b/man/ocf_suse_SAPHana.7 @@ -293,7 +293,7 @@ system replication (e.g. PRD), managed by the SAPHana RA. The second SAPHana is a single test HANA (e.g. TST) running together with the productive HANA secondary on the same node. This second -single- HANA is managed as a primitive resource by the SAPInstance RA. Of course, a SAPHanaTopology clone resource is -needed to make this work. It is also necessary to prepare an HANA HA/DR hook +needed to make this work. It is also neccessary to prepare an HANA HA/DR hook script for adjusting the secondary HANA's memory in case of sr_takeover. See manual page susCostOpt.py(7) and URLs below. Finally, the SAPHana primary gets a priority to allow priority fencing. See manual page @@ -379,7 +379,7 @@ Note: Older versions of the Linux cluster have used the commands 'migrate' and .PP * Manually start the HANA primary if only one node is available. -This might be necessary in case the cluster could not detect the status of both nodes. +This might be neccessary in case the cluster could not detect the status of both nodes. .PP .RS 2 1. Start the cluster. From e77b163922cb81c95ee7c733abc632199f4f21ee Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 5 Jun 2024 13:31:41 +0200 Subject: [PATCH 06/84] SAPHanaSR-ScaleOut.7 SAPHanaSR-ScaleOut_basic_cluster.7 SAPHanaSR-upgrade-to-angi-demo.8 SAPHanaSR.7 SAPHanaSR_basic_cluster.7 SAPHanaSR_maintenance_examples.7 SAPHanaSR_upgrade_to_angi.7 ocf_suse_SAPHana.7 ocf_suse_SAPHanaController.7 susCostOpt.py.7: typos --- man/SAPHanaSR-ScaleOut.7 | 4 ++-- man/SAPHanaSR-ScaleOut_basic_cluster.7 | 4 ++-- man/SAPHanaSR-upgrade-to-angi-demo.8 | 6 +++--- man/SAPHanaSR.7 | 4 ++-- man/SAPHanaSR_basic_cluster.7 | 4 ++-- man/SAPHanaSR_maintenance_examples.7 | 12 ++++++------ man/SAPHanaSR_upgrade_to_angi.7 | 2 +- man/ocf_suse_SAPHana.7 | 4 ++-- man/ocf_suse_SAPHanaController.7 | 2 +- man/susCostOpt.py.7 | 4 ++-- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/man/SAPHanaSR-ScaleOut.7 b/man/SAPHanaSR-ScaleOut.7 index 99006ab5..27e43da6 100644 --- a/man/SAPHanaSR-ScaleOut.7 +++ b/man/SAPHanaSR-ScaleOut.7 @@ -73,7 +73,7 @@ A primary absolutely must never be started, if the cluster does not know anything about the other site. On initial cluster start, the cluster needs to detect a valid HANA system replication setup, including system replication status (SOK) and last primary -timestamp (LPT). This is neccessary to ensure data integrity. +timestamp (LPT). This is necessary to ensure data integrity. .PP The rational behind this is shown in the following scenario: .br @@ -235,7 +235,7 @@ leads to a Linux cluster in either one site or across three sites. .PP 2. Technical users and groups such as sidadm should be defined locally in the Linux system. If users are resolved by remote service, local caching is -neccessary. Substitute user (su) to sidadm needs to work reliable and without +necessary. Substitute user (su) to sidadm needs to work reliable and without customized actions or messages. Supported shell is bash. .PP 3. Strict time synchronization between the cluster nodes, e.g. NTP. All nodes of diff --git a/man/SAPHanaSR-ScaleOut_basic_cluster.7 b/man/SAPHanaSR-ScaleOut_basic_cluster.7 index 50ed40ba..352f809a 100644 --- a/man/SAPHanaSR-ScaleOut_basic_cluster.7 +++ b/man/SAPHanaSR-ScaleOut_basic_cluster.7 @@ -9,7 +9,7 @@ SAPHanaSR-ScaleOut_basic_cluster \- SAP HANA System Replication scale-out basic .SH DESCRIPTION .\" The SAP HANA System Replication scale-out scenario needs a certain basic -cluster configuration. Besides this neccessary settings, some additional +cluster configuration. Besides this necessary settings, some additional configurations might match specific needs. Adapting a few SAP HANA settings might be beneficial as well. .\" @@ -234,7 +234,7 @@ op_defaults op-options: \\ \fB* crm SBD stonith configuration\fR -To complete the SBD setup, it is neccessary to activate SBD as STONITH/fencing +To complete the SBD setup, it is necessary to activate SBD as STONITH/fencing mechanism in the CIB. The SBD is normally used for SAPHanaSR-ScaleOut instead of any other fencing/stonith mechanism. Example for a basic disk-based SBD resource: diff --git a/man/SAPHanaSR-upgrade-to-angi-demo.8 b/man/SAPHanaSR-upgrade-to-angi-demo.8 index 64fac6ce..ca9c72ca 100644 --- a/man/SAPHanaSR-upgrade-to-angi-demo.8 +++ b/man/SAPHanaSR-upgrade-to-angi-demo.8 @@ -147,7 +147,7 @@ performing an upgrade: The script needs to be copied to all cluster nodes upfront. It should be called on the HANA primary node. Before doing this, you should check and prepare pre-requisites, see example above. The proposed commands need to be checked. -Sometimes adaptions are neccessary. +Sometimes adaptions are necessary. See also SAPHanaSR_upgrade_to_angi(7). .PP .RS 2 @@ -161,7 +161,7 @@ Before doing this, you should check and prepare pre-requisites, see example above. The runbook draft is stored as file "SAPHanaSR-upgrade-draft.txt". This draft can be used for preparing details for the upgrade procedure. The proposed commands need to be checked. Sometimes adaptions are -neccessary. Of course the result needs to be checked finally as well. +necessary. Of course the result needs to be checked finally as well. See also SAPHanaSR_upgrade_to_angi(7) and tee(1). .PP .RS 2 @@ -200,7 +200,7 @@ performing the removal of SAPHanaSR: The script needs to be copied to all cluster nodes beforehand. It should be called on the HANA primary node. Before doing this, you should check and prepare pre-requisites, see example above. The proposed commands need to be checked. -Sometimes adaptions are neccessary. Of course the result needs to be checked +Sometimes adaptions are necessary. Of course the result needs to be checked finally as well. See also SAPHanaSR_upgrade_to_angi(7). .PP .RS 2 diff --git a/man/SAPHanaSR.7 b/man/SAPHanaSR.7 index ef64e4bd..68b6103e 100644 --- a/man/SAPHanaSR.7 +++ b/man/SAPHanaSR.7 @@ -67,7 +67,7 @@ A primary absolutely must never be started, if the cluster does not know anything about the other site. On initial cluster start, the cluster needs to detect a valid HANA system replication setup, including system replication status (SOK) and last primary -timestamp (LPT). This is neccessary to ensure data integrity. +timestamp (LPT). This is necessary to ensure data integrity. .PP The rational behind this is shown in the following scenario: .br @@ -193,7 +193,7 @@ best practices. .PP 2. Technical users and groups such as sidadm are defined locally in the Linux system. If users are resolved by remote service, local caching is -neccessary. Substitute user (su) to sidadm needs to work reliable and without +necessary. Substitute user (su) to sidadm needs to work reliable and without customized actions or messages. Supported shell is bash. .PP 3. Strict time synchronization between the cluster nodes, e.g. NTP. All nodes of diff --git a/man/SAPHanaSR_basic_cluster.7 b/man/SAPHanaSR_basic_cluster.7 index 720ebb0f..85ec5347 100644 --- a/man/SAPHanaSR_basic_cluster.7 +++ b/man/SAPHanaSR_basic_cluster.7 @@ -9,7 +9,7 @@ SAPHanaSR_basic_cluster \- SAP HANA System Replication scale-up basic cluster co .SH DESCRIPTION .\" The SAP HANA System Replication scale-up scenario needs a certain basic -cluster configuration. Besides this neccessary settings, some additional +cluster configuration. Besides this necessary settings, some additional configurations might match specific needs. Adapting a few SAP HANA settings might be beneficial as well. .\" @@ -189,7 +189,7 @@ op_defaults op-options: \\ \fB* crm simple SBD stonith configuration\fR -To complete the SBD setup, it is neccessary to activate SBD as STONITH/fencing +To complete the SBD setup, it is necessary to activate SBD as STONITH/fencing mechanism in the CIB. The SBD is normally used for SAPHanaSR scale-up instead of any other fencing/stonith mechanism. Example for a basic disk-based SBD resource: diff --git a/man/SAPHanaSR_maintenance_examples.7 b/man/SAPHanaSR_maintenance_examples.7 index bb3d02f7..f653efaa 100644 --- a/man/SAPHanaSR_maintenance_examples.7 +++ b/man/SAPHanaSR_maintenance_examples.7 @@ -47,7 +47,7 @@ This might be convenient when performing administrative actions or cluster tests \fB*\fR Overview on stopping the HANA database at one site. This procedure does work for scale-up and scale-out. No takeover will be done. This procedure -should be used, when it is neccessary to stop the HANA database. Stopping the HANA database +should be used, when it is necessary to stop the HANA database. Stopping the HANA database should not be done by just stopping the Linux cluster or shutting down the OS. This particularly applies to scale-out systems. It might be good to define upfront which HANA site needs to be stopped. In case both sites need to be stopped, it might be good to define the order. First @@ -270,7 +270,7 @@ In this example, node is suse11 on the future secondary site to be registered. R .PP \fB*\fR Manually start the HANA primary if only one site is available. -This might be neccessary in case the cluster can not detect the status of both sites. +This might be necessary in case the cluster can not detect the status of both sites. This is an advanced task. .PP \fBBefore doing this, make sure HANA is not primary on the other site!\fR @@ -319,8 +319,8 @@ STONITH. STONITH via SBD is used in this example. .\" \fB*\fR Overview on maintenance procedure for Linux, HANA remains running, on pacemaker-2.0. -It is neccessary to wait for each step to complete and to check the result. It -also is neccessary to test and document the whole procedure before applying in +It is necessary to wait for each step to complete and to check the result. It +also is necessary to test and document the whole procedure before applying in production. See also section REQUIREMENTS below and example on checking status of HANA and cluster above. .\" TODO details @@ -381,8 +381,8 @@ HANA gets fully stopped. This procedure can be used to update HANA, OS or hardware. HANA roles and resource status remains unchanged. -It is neccessary to wait for each step to complete and to check the result. -It also is neccessary to test and document the whole procedure before applying in production. +It is necessary to wait for each step to complete and to check the result. +It also is necessary to test and document the whole procedure before applying in production. .PP .RS 2 1. disabling pacemaker on HANA primary diff --git a/man/SAPHanaSR_upgrade_to_angi.7 b/man/SAPHanaSR_upgrade_to_angi.7 index 230bcf17..237c73ea 100644 --- a/man/SAPHanaSR_upgrade_to_angi.7 +++ b/man/SAPHanaSR_upgrade_to_angi.7 @@ -18,7 +18,7 @@ installation from scratch. .PP The upgrade procedure depends on an initial setup as described in setup guides and manual pages. See REQUIREMENTS below and in manual pages SAPHanaSR(7) or -SAPHanaSR-ScaleOut(7). The procedure does not neccesarily need downtime for +SAPHanaSR-ScaleOut(7). The procedure does not necessarily need downtime for HANA, if planned and excuted carefully. Nevertheless, it should be done under friendly conditions. .PP diff --git a/man/ocf_suse_SAPHana.7 b/man/ocf_suse_SAPHana.7 index 7fff7639..bb003b63 100644 --- a/man/ocf_suse_SAPHana.7 +++ b/man/ocf_suse_SAPHana.7 @@ -293,7 +293,7 @@ system replication (e.g. PRD), managed by the SAPHana RA. The second SAPHana is a single test HANA (e.g. TST) running together with the productive HANA secondary on the same node. This second -single- HANA is managed as a primitive resource by the SAPInstance RA. Of course, a SAPHanaTopology clone resource is -needed to make this work. It is also neccessary to prepare an HANA HA/DR hook +needed to make this work. It is also necessary to prepare an HANA HA/DR hook script for adjusting the secondary HANA's memory in case of sr_takeover. See manual page susCostOpt.py(7) and URLs below. Finally, the SAPHana primary gets a priority to allow priority fencing. See manual page @@ -379,7 +379,7 @@ Note: Older versions of the Linux cluster have used the commands 'migrate' and .PP * Manually start the HANA primary if only one node is available. -This might be neccessary in case the cluster could not detect the status of both nodes. +This might be necessary in case the cluster could not detect the status of both nodes. .PP .RS 2 1. Start the cluster. diff --git a/man/ocf_suse_SAPHanaController.7 b/man/ocf_suse_SAPHanaController.7 index 4d4d6e87..9b577deb 100644 --- a/man/ocf_suse_SAPHanaController.7 +++ b/man/ocf_suse_SAPHanaController.7 @@ -31,7 +31,7 @@ SAPHanaController and SAPHanaTopology RAs, the SAPHanaSR solution uses an "HA/DR provider" API provided by HANA to get informed about the current state of the system replication. .PP -On initial cluster start, the cluster needs to detect a valid HANA system replication setup, including system replication status (SOK) and last primary timestamp (LPT). This is neccessary to ensure data integrity. +On initial cluster start, the cluster needs to detect a valid HANA system replication setup, including system replication status (SOK) and last primary timestamp (LPT). This is necessary to ensure data integrity. .PP The SAPHanaController RA performs the actual check of the SAP HANA database instances and is configured as promoatble clone resource. diff --git a/man/susCostOpt.py.7 b/man/susCostOpt.py.7 index 439c5e0f..4949bdd4 100644 --- a/man/susCostOpt.py.7 +++ b/man/susCostOpt.py.7 @@ -150,10 +150,10 @@ Please refer to SAP documentation for details on HANA commands. * Overview on recovery procedure for reverting to normal operation after takeover. .PP On postTakeover() the hook script changes configuration in memory and in -persistence. It is neccessary to recover the initial settings on secondary site +persistence. It is necessary to recover the initial settings on secondary site (step 7) before the fully operational state can be re-established (steps 8-11). Futher the HANA cluster resource default is AUTOMATED_REGISTER=false. This also -makes administrative interaction neccessary (steps 1-4). +makes administrative interaction necessary (steps 1-4). If AUTOMATED_REGISTER=true is set, the Linux cluster will do that automatically. See manual page ocf_suse_SAPHanaController(7) for details on cluster resource settings. Use exact same site names as known to the Linux cluster. See manual page From 11a3ccc2a8bba93e03f3c77966568472716c5415 Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 5 Jun 2024 15:08:59 +0200 Subject: [PATCH 07/84] SAPHanaSR-upgrade-to-angi-demo: fixed function end messages --- tools/SAPHanaSR-upgrade-to-angi-demo | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/SAPHanaSR-upgrade-to-angi-demo b/tools/SAPHanaSR-upgrade-to-angi-demo index 3d150c3a..55e0cd2b 100755 --- a/tools/SAPHanaSR-upgrade-to-angi-demo +++ b/tools/SAPHanaSR-upgrade-to-angi-demo @@ -12,7 +12,7 @@ # # define parameters and functions # -VERSION="2024-05-06 0.3" +VERSION="2024-06-05 0.3a" DRYRUN=yes # TODO DRYRUN=no EXE=$(basename $0) @@ -208,7 +208,7 @@ function f_maintenance-on-classic() { echo "echo \"property cib-bootstrap-options: stop-orphan-resources=false\" | crm configure load update -" [ $DRYRUN = no ] && echo "property cib-bootstrap-options: stop-orphan-resources=false" |\ crm configure load update - - echo-funa run "${FUNCNAME[0]}" + echo-funa end "${FUNCNAME[0]}" } function f_maintenance-off-angi() { @@ -235,7 +235,7 @@ function f_maintenance-off-angi() { echo "echo \"property cib-bootstrap-options: stop-orphan-resources=true\" | crm configure load update -" [ $DRYRUN = no ] && echo "property cib-bootstrap-options: stop-orphan-resources=true" |\ crm configure load update - - echo-funa run "${FUNCNAME[0]}" + echo-funa end "${FUNCNAME[0]}" } function del-srhook-local-classic() { From 86cf6cde611f94e2893eecbbaea5f55133f704b1 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 11 Jun 2024 17:43:47 +0200 Subject: [PATCH 08/84] angi: adding alert handler directory and first alert agent --- alert/SAPHanaSR-alert | 54 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100755 alert/SAPHanaSR-alert diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert new file mode 100755 index 00000000..0f28f229 --- /dev/null +++ b/alert/SAPHanaSR-alert @@ -0,0 +1,54 @@ +#!/bin/bash +# +# SAPHanaSR-alert +# Author: Lars Pinne Fabian Herschel, June 2024 +# Support: linux@sap.com +# License: GNU General Public License (GPL) +# Copyright: (c) 2024 SUSE LLC +# 2024-06-11-17:40 +# +# crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes +# crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +# + +/usr/bin/logger "SAPHanaSR-alert: init" +CRM_alert_recipient=/dev/null +cache_file="/run/crm/SAPHanaSR_site_cache" + +function process_fencing() +{ + # SAPHanaSR_site_cache has format (each line) host:site_name + # figure out fenced site + + if [[ -e "$cache_file" ]]; then + fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${CRM_alert_node}" "$cache_file") + local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file") + if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then + /usr/bin/logger "SAPHanaSR-alert: Fence-Decision: FENCE ($fenced_site_name == $local_site_name)" + /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" + if [[ "$rc" != "0" ]]; then + /usr/bin/logger "SAPHanaSR-alert: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" + fi + else + /usr/bin/logger "SAPHanaSR-alert: Fence-Decision: NO FENCE ($fenced_site_name != $local_site_name)" + fi + else + /usr/bin/logger "SAPHanaSR-alert: Fence-Decision: NO FENCE (no cache)" + fi +} + + +case "$CRM_alert_kind" in + node|nodes) + msg="Node '${CRM_alert_node}' is now '${CRM_alert_desc}'" + ;; + fencing) + msg="Fencing for '${CRM_alert_node}': ${CRM_alert_desc}" + process_fencing + ;; + *) + msg="Unhandled $CRM_alert_kind alert (${CRM_alert_desc})" + ;; +esac +/usr/bin/logger "SAPHanaSR-alert: $msg" +# From f29cc68760a2bd23cbe6b52f6821dd9a70e09713 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 11 Jun 2024 17:48:39 +0200 Subject: [PATCH 09/84] angi: saphana-controller-lib - document initial return code --- ra/saphana-controller-lib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 235e7afe..15a5af48 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -1736,7 +1736,7 @@ function saphana_monitor_secondary() { lpa_set_lpt 10 "$gSite" rc="$OCF_ERR_GENERIC" ;; - 1 ) # ERROR + 1 ) # ERROR (rc is set to OCF_NOT_RUNNING by init (see local definition)) super_ocf_log debug "DBG: 012 * lpa_set_lpt 10 $gSite" lpa_set_lpt 10 "$gSite" ;; From 3122db4566735214adf168ecc451c05054b8439d Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 11 Jun 2024 17:55:20 +0200 Subject: [PATCH 10/84] angi: tools/SAPHanaSR-showAttr - updated copyright lines --- tools/SAPHanaSR-showAttr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/SAPHanaSR-showAttr b/tools/SAPHanaSR-showAttr index 326510ff..793ff7e1 100755 --- a/tools/SAPHanaSR-showAttr +++ b/tools/SAPHanaSR-showAttr @@ -5,7 +5,7 @@ saphana_sr_tools.py Author: Fabian Herschel, May 2023 License: GNU General Public License (GPL) - Copyright: (c) 2023 SUSE LLC + Copyright: (c) 2023,2024 SUSE LLC # TODO: STEP02: Think also about multi SID implementation - maybe by using multiple HanaCluster objects (one per SID) """ From 920d0112561eae9cc578138e24e36705e68faa46 Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 08:44:40 +0200 Subject: [PATCH 11/84] SAPHanaSR-alert.8: initial checkin --- man/SAPHanaSR-alert.8 | 107 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 man/SAPHanaSR-alert.8 diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 new file mode 100644 index 00000000..3669e3cb --- /dev/null +++ b/man/SAPHanaSR-alert.8 @@ -0,0 +1,107 @@ +.\" Version: 1.001 +.\" +.TH SAPHanaSR-alert 7 "12 Jun 2024" "" "SAPHanaSR" +.\" +.SH NAME +SAPHanaSR-alert \- Alert agent for cluster fencing alerts. +.PP +.\" +.SH DESCRIPTION +SAPHanaSR-alert can be used to react on Linux cluster fencing alerts. + +The Linux cluster provides TODO + +improves three use cases for HANA scale-out TODO +.br +- HA/DR provider hook script susChkSrv.py action_on_lost=fence +.br +- resource agent SAPHanaController ON_FAIL_ACTION=fence +.br +- resource agent SAPHanaFilesystem ON_FAIL_ACTION=fence +.PP + +This alert agent will run on all Linux cluster nodes. +.PP +.\" +.SH SUPPORTED PARAMETERS +.PP +.\" +.PP +.\" +.SH RETURN CODES +.B 0 +Successful program execution. +.br +.B >0 +Usage, syntax or execution errors. +.PP +.\" +.SH EXAMPLES +.PP +\fB*\fR Example for +.PP +TODO +.PP +alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +.PP +\fB*\fR Example for configuring the alert agent per crm. +.PP +Alternate way for configuring the aler agent. +.PP +# crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +.PP +\fB*\fR Example for showing all configured alert agents. +.PP +# crm configure show | grep -A1 alert +.PP +\fB*\fR Example for showing +.PP +# grep SAPHanaSR-alert /var/log/messages +.PP +.\" +.SH FILES +.TP +/usr/bin/SAPHanaSR-alert +the alert agent +.TP +/run/crm/SAPHanaSR_site_cache +the internal cache for host TODO site +.PP +.\" +.SH REQUIREMENTS +1. Pacemaker 2.1 TODO or newer. +.PP +2. HANA scale-out performance-optimized scenario. +.PP +3. Only one SID controlled by the Linux cluster. +.PP +4. No other alert agent should be configured for the fencing alert. +.PP +5. The alert agent runtime almost completely depends on call-outs to OS and +Linux cluster. +.\" +.SH BUGS +In case of any problem, please use your favourite SAP support process to open +a request for the component BC-OP-LNX-SUSE. +Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +\fBSAPHanaSR-angi\fP(7) , +\fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , +\fBsusChkSrv.py\fP (7) , \fBcrm\fP(8) +.PP +.\" +.SH AUTHORS +F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +.br +(c) 2024 SUSE LLC +.br +SAPHanaSR-alert comes with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" From b59717942e442082b5045bc3b8018dc9be1ea8bc Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 08:49:07 +0200 Subject: [PATCH 12/84] SAPHanaSR-alert.8: initial checkin --- man/SAPHanaSR-alert.8 | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 3669e3cb..106c19aa 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -34,11 +34,14 @@ Successful program execution. .br .B >0 Usage, syntax or execution errors. +.br +In addition log entries are written, which can be scanned by using a pattern +like "SAPHanaSR-alert". .PP .\" .SH EXAMPLES .PP -\fB*\fR Example for +\fB*\fR Example configuration for the fencing alert handler. .PP TODO .PP From 89d6fa626e71639cc14c10d54a876ce0fd8c3e38 Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 08:51:24 +0200 Subject: [PATCH 13/84] SAPHanaSR-alert.8: initial checkin --- man/SAPHanaSR-alert.8 | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 106c19aa..5c91722e 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -45,21 +45,29 @@ like "SAPHanaSR-alert". .PP TODO .PP +.RS 2 alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +.RE .PP \fB*\fR Example for configuring the alert agent per crm. .PP Alternate way for configuring the aler agent. .PP +.RS 2 # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +.RE .PP \fB*\fR Example for showing all configured alert agents. .PP +.RS 2 # crm configure show | grep -A1 alert +.RE .PP \fB*\fR Example for showing .PP +.RS 2 # grep SAPHanaSR-alert /var/log/messages +.RE .PP .\" .SH FILES From 9b55edabd2348e5c92ffde870b0e0f0cb3f62818 Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 08:58:48 +0200 Subject: [PATCH 14/84] SAPHanaSR-alert.8: initial checkin --- man/SAPHanaSR-alert.8 | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 5c91722e..bfa9a19c 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -19,12 +19,14 @@ improves three use cases for HANA scale-out TODO .br - resource agent SAPHanaFilesystem ON_FAIL_ACTION=fence .PP - +See also manual pages ocf_sus_SAPHanaController(7), ocf_suse_SAPHanaFilesystem(7) +and susChkSrv.py(7). +.PP This alert agent will run on all Linux cluster nodes. .PP .\" -.SH SUPPORTED PARAMETERS -.PP +.\" .SH SUPPORTED PARAMETERS +.\" .PP .\" .PP .\" @@ -43,15 +45,15 @@ like "SAPHanaSR-alert". .PP \fB*\fR Example configuration for the fencing alert handler. .PP -TODO +The following line needs to be added to the cluster´s CIB: .PP .RS 2 alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing .RE .PP -\fB*\fR Example for configuring the alert agent per crm. +\fB*\fR Example for configuring the alert agent by using crm. .PP -Alternate way for configuring the aler agent. +Alternate way for configuring the alert agent. .PP .RS 2 # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing From 77307fb6980406c84c6575fd47de83578e9fd5bd Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 09:13:25 +0200 Subject: [PATCH 15/84] SAPHanaSR-alert.8: initial checkin --- man/SAPHanaSR-alert.8 | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index bfa9a19c..cdb70996 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -11,18 +11,22 @@ SAPHanaSR-alert can be used to react on Linux cluster fencing alerts. The Linux cluster provides TODO -improves three use cases for HANA scale-out TODO +SAPHanaSR-agent is used to fence all nodes of an HANA site, in case one of +the gets fenced. This improves three use cases for HANA scale-out: .br - HA/DR provider hook script susChkSrv.py action_on_lost=fence .br - resource agent SAPHanaController ON_FAIL_ACTION=fence .br - resource agent SAPHanaFilesystem ON_FAIL_ACTION=fence -.PP +.br See also manual pages ocf_sus_SAPHanaController(7), ocf_suse_SAPHanaFilesystem(7) and susChkSrv.py(7). .PP -This alert agent will run on all Linux cluster nodes. +When the Linux cluster has performed a node fencing, it calls SAPHanaSR-agent +on each active cluster node. The agent checks whether the local node belongs to +the same HANA site as the fenced node. If so, it asks the cluster to fence the +local node as well. .PP .\" .\" .SH SUPPORTED PARAMETERS @@ -90,7 +94,11 @@ the internal cache for host TODO site .PP 4. No other alert agent should be configured for the fencing alert. .PP -5. The alert agent runtime almost completely depends on call-outs to OS and +5. Automatic start of the Linux cluster at boot should be switched off. +At least automatic re-start on just fenced nodes should be disabled. See +manual page sbd(8) for details. +.PP +6. The alert agent runtime almost completely depends on call-outs to OS and Linux cluster. .\" .SH BUGS From 085657ec9c3851be4dfe0aa63b4a016881dad36f Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 09:15:08 +0200 Subject: [PATCH 16/84] SAPHanaSR-alert.8: typos --- man/SAPHanaSR-alert.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index cdb70996..67a8544f 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -12,7 +12,7 @@ SAPHanaSR-alert can be used to react on Linux cluster fencing alerts. The Linux cluster provides TODO SAPHanaSR-agent is used to fence all nodes of an HANA site, in case one of -the gets fenced. This improves three use cases for HANA scale-out: +them gets fenced. This improves three use cases for HANA scale-out: .br - HA/DR provider hook script susChkSrv.py action_on_lost=fence .br From 852ee33d77dd2937c090683b7a428413c47ddcde Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 10:04:39 +0200 Subject: [PATCH 17/84] SAPHanaSR-alert.8: details, todos --- man/SAPHanaSR-alert.8 | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 67a8544f..55ee5556 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -1,6 +1,7 @@ .\" Version: 1.001 .\" .TH SAPHanaSR-alert 7 "12 Jun 2024" "" "SAPHanaSR" +.\" TODO SAPHanaSR-alert-fencing ? .\" .SH NAME SAPHanaSR-alert \- Alert agent for cluster fencing alerts. @@ -9,7 +10,7 @@ SAPHanaSR-alert \- Alert agent for cluster fencing alerts. .SH DESCRIPTION SAPHanaSR-alert can be used to react on Linux cluster fencing alerts. -The Linux cluster provides TODO +The Linux cluster provides TODO SAPHanaSR-agent is used to fence all nodes of an HANA site, in case one of them gets fenced. This improves three use cases for HANA scale-out: @@ -31,6 +32,8 @@ local node as well. .\" .\" .SH SUPPORTED PARAMETERS .\" .PP +.\" Alert meta option +.\" TODO timeout default 30s "meta timeout=30s" .\" .PP .\" @@ -69,7 +72,7 @@ Alternate way for configuring the alert agent. # crm configure show | grep -A1 alert .RE .PP -\fB*\fR Example for showing +\fB*\fR Example for showing agent messages. .PP .RS 2 # grep SAPHanaSR-alert /var/log/messages @@ -82,21 +85,20 @@ Alternate way for configuring the alert agent. the alert agent .TP /run/crm/SAPHanaSR_site_cache -the internal cache for host TODO site +the internal cache for host to site relation - do not touch this file .PP .\" .SH REQUIREMENTS -1. Pacemaker 2.1 TODO or newer. +1. Pacemaker 2.1 or newer. .PP -2. HANA scale-out performance-optimized scenario. +2. SAP HANA scale-out performance-optimized scenario. .PP 3. Only one SID controlled by the Linux cluster. .PP 4. No other alert agent should be configured for the fencing alert. .PP -5. Automatic start of the Linux cluster at boot should be switched off. -At least automatic re-start on just fenced nodes should be disabled. See -manual page sbd(8) for details. +5. Automatic re-start on just fenced nodes should be disabled. See manual page +sbd(8) for details. .PP 6. The alert agent runtime almost completely depends on call-outs to OS and Linux cluster. @@ -108,7 +110,7 @@ Please report any other feedback and suggestions to feedback@suse.com. .PP .\" .SH SEE ALSO -\fBSAPHanaSR-angi\fP(7) , +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , \fBsusChkSrv.py\fP (7) , \fBcrm\fP(8) .PP From 4094362be53d47627b641b8b6cbec84e54139353 Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 10:22:34 +0200 Subject: [PATCH 18/84] SAPHanaSR-alert.8: details --- man/SAPHanaSR-alert.8 | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 55ee5556..72693e28 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -30,10 +30,11 @@ the same HANA site as the fenced node. If so, it asks the cluster to fence the local node as well. .PP .\" -.\" .SH SUPPORTED PARAMETERS -.\" .PP -.\" Alert meta option -.\" TODO timeout default 30s "meta timeout=30s" +.SH SUPPORTED PARAMETERS +.TP +\fBtimeout\fR +.br +If the alert agent does not complete within this amount of time, it will be terminated. Optional, default 30s. Example "meta timeout=30s". .\" .PP .\" From 31f130c0a04fa789aa0926695850ee05f98080db Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 10:33:55 +0200 Subject: [PATCH 19/84] SAPHanaSR-alert.8: details, todos --- man/SAPHanaSR-alert.8 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 72693e28..2e04d11f 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -10,7 +10,9 @@ SAPHanaSR-alert \- Alert agent for cluster fencing alerts. .SH DESCRIPTION SAPHanaSR-alert can be used to react on Linux cluster fencing alerts. -The Linux cluster provides TODO +The Linux cluster provides an interface to take external action when a cluster +event occurs. The cluster calls an external program (an alert agent) to handle +alerts. SAPHanaSR-agent is used to fence all nodes of an HANA site, in case one of them gets fenced. This improves three use cases for HANA scale-out: From 5f845b298e42b2a5874b53ec36e23bcf5efc85bd Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 10:41:58 +0200 Subject: [PATCH 20/84] SAPHanaSR-alert.8: details, todos --- man/SAPHanaSR-alert.8 | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 2e04d11f..5862e031 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -35,8 +35,10 @@ local node as well. .SH SUPPORTED PARAMETERS .TP \fBtimeout\fR -.br -If the alert agent does not complete within this amount of time, it will be terminated. Optional, default 30s. Example "meta timeout=30s". +If the alert agent does not complete within this amount of time, it will be terminated. Optional, default "30s". Example "meta timeout=30s". +.TP +\fBenabled\fR +If false for an alert, the alert will not be used. If true for an alert and false for a particular recipient of that alert, that recipient will not be used. Optional, default "true". .\" .PP .\" @@ -92,7 +94,7 @@ the internal cache for host to site relation - do not touch this file .PP .\" .SH REQUIREMENTS -1. Pacemaker 2.1 or newer. +1. Pacemaker 2.1.6 or newer. .PP 2. SAP HANA scale-out performance-optimized scenario. .PP @@ -115,7 +117,9 @@ Please report any other feedback and suggestions to feedback@suse.com. .SH SEE ALSO \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , -\fBsusChkSrv.py\fP (7) , \fBcrm\fP(8) +\fBsusChkSrv.py\fP (7) , \fBcrm\fP(8) , +.br +https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/singlehtml/#alert-agents .PP .\" .SH AUTHORS From ae0cdd573e48612732c78c9ff5f95c7e0099d00e Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 12 Jun 2024 10:49:07 +0200 Subject: [PATCH 21/84] SAPHanaSR-alert.8: details --- man/SAPHanaSR-alert.8 | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 5862e031..480f481a 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -9,11 +9,16 @@ SAPHanaSR-alert \- Alert agent for cluster fencing alerts. .\" .SH DESCRIPTION SAPHanaSR-alert can be used to react on Linux cluster fencing alerts. - +.PP The Linux cluster provides an interface to take external action when a cluster -event occurs. The cluster calls an external program (an alert agent) to handle -alerts. - +event occurs (alert). Than the cluster calls an external program (an alert agent) +to handle that alert. +.PP +When the Linux cluster has performed a node fencing, it can call SAPHanaSR-agent +on each active cluster node. The agent checks whether the local node belongs to +the same HANA site as the fenced node. If so, it asks the cluster to fence the +local node as well. +.PP SAPHanaSR-agent is used to fence all nodes of an HANA site, in case one of them gets fenced. This improves three use cases for HANA scale-out: .br @@ -26,11 +31,6 @@ them gets fenced. This improves three use cases for HANA scale-out: See also manual pages ocf_sus_SAPHanaController(7), ocf_suse_SAPHanaFilesystem(7) and susChkSrv.py(7). .PP -When the Linux cluster has performed a node fencing, it calls SAPHanaSR-agent -on each active cluster node. The agent checks whether the local node belongs to -the same HANA site as the fenced node. If so, it asks the cluster to fence the -local node as well. -.PP .\" .SH SUPPORTED PARAMETERS .TP From 2162c0af0ad8d92bbce1b3d52fb5f2f56623576d Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 12 Jun 2024 13:15:45 +0200 Subject: [PATCH 22/84] angi: SAPHanaSR-alert - improved log message format and shellcheck compatability --- alert/SAPHanaSR-alert | 44 +++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 0f28f229..0879bc72 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -5,14 +5,20 @@ # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2024 SUSE LLC -# 2024-06-11-17:40 +# 2024-06-12-13:14 # # crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing # -/usr/bin/logger "SAPHanaSR-alert: init" -CRM_alert_recipient=/dev/null +logger_tag="SAPHanaSR-alert" + +# ON_FAIL_ACTION="${OCF_RESKEY_ON_FAIL_ACTION:-proceed}" +CRM_alert_recipient="${CRM_alert_recipient:-/dev/null}" +crm_alert_kind="${CRM_alert_kind:-manual call}" +crm_alert_node="${CRM_alert_node:-$HOSTNAME}" +crm_alert_desc="${CRM_alert_desc:-no description provided}" +/usr/bin/logger -t "$logger_tag" "AH: begin event '$crm_alert_kind'" cache_file="/run/crm/SAPHanaSR_site_cache" function process_fencing() @@ -21,34 +27,36 @@ function process_fencing() # figure out fenced site if [[ -e "$cache_file" ]]; then - fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${CRM_alert_node}" "$cache_file") + fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file") local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file") if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then - /usr/bin/logger "SAPHanaSR-alert: Fence-Decision: FENCE ($fenced_site_name == $local_site_name)" + /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" if [[ "$rc" != "0" ]]; then - /usr/bin/logger "SAPHanaSR-alert: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" + /usr/bin/logger -t "$logger_tag" "/usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" fi else - /usr/bin/logger "SAPHanaSR-alert: Fence-Decision: NO FENCE ($fenced_site_name != $local_site_name)" + /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" fi else - /usr/bin/logger "SAPHanaSR-alert: Fence-Decision: NO FENCE (no cache)" + /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE (no cache)" fi } - -case "$CRM_alert_kind" in +case "$crm_alert_kind" in node|nodes) - msg="Node '${CRM_alert_node}' is now '${CRM_alert_desc}'" - ;; + msg="Node '${crm_alert_node}' is now '${crm_alert_desc}'" + /usr/bin/logger -t "$logger_tag" "$msg" + ;; fencing) - msg="Fencing for '${CRM_alert_node}': ${CRM_alert_desc}" - process_fencing - ;; + msg="Fencing for '${crm_alert_node}': ${crm_alert_desc}" + /usr/bin/logger -t "$logger_tag" "$msg" + process_fencing + ;; *) - msg="Unhandled $CRM_alert_kind alert (${CRM_alert_desc})" - ;; + msg="Unhandled '$crm_alert_kind' alert (${crm_alert_desc})" + /usr/bin/logger -t "$logger_tag" "$msg" + ;; esac -/usr/bin/logger "SAPHanaSR-alert: $msg" +/usr/bin/logger -t "$logger_tag" "AH: end event '$crm_alert_kind'" # From 960075ff2b2ddc6436d4566f01f3c97593d6ebc8 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 12 Jun 2024 13:50:59 +0200 Subject: [PATCH 23/84] angi: adding alert handler to the package --- Makefile | 1 + SAPHanaSR-angi.spec | 6 +++++- alert/SAPHanaSR-alert | 10 +++++----- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index ed738906..b9903d6a 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ FILE_LIST = LICENSE \ README.md \ + alert \ crm_cfg \ icons \ man \ diff --git a/SAPHanaSR-angi.spec b/SAPHanaSR-angi.spec index 62a336be..4066aa2d 100644 --- a/SAPHanaSR-angi.spec +++ b/SAPHanaSR-angi.spec @@ -21,7 +21,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Resource agents to control the HANA database in system replication setup -Version: 1.2.7 +Version: 1.2.8 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ @@ -96,6 +96,9 @@ install -m 0644 srHook/susCostOpt.py %{buildroot}/usr/share/%{name}/ install -m 0644 srHook/susChkSrv.py %{buildroot}/usr/share/%{name}/ install -m 0444 srHook/global.ini_* %{buildroot}/usr/share/%{name}/samples +# alert manager +install -m 0755 alert/SAPHanaSR-alert %{buildroot}/usr/bin + # crm config templates install -m 0644 crm_cfg/angi-ScaleUp/[0-9]*_* %{buildroot}/usr/share/%{name}/samples/crm_cfg/angi-ScaleUp @@ -138,6 +141,7 @@ install -m 0444 tools/saphana_sr_tools.py %{buildroot}/usr/lib/%{name} /usr/bin/SAPHanaSR-filter-legacy /usr/bin/SAPHanaSR-hookHelper /usr/bin/SAPHanaSR-manageProvider +/usr/bin/SAPHanaSR-alert %license LICENSE %dir %{_docdir}/%{name} diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 0879bc72..7f101f7d 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -5,7 +5,7 @@ # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2024 SUSE LLC -# 2024-06-12-13:14 +# 2024-06-12-13:40 # # crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing @@ -33,7 +33,7 @@ function process_fencing() /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" if [[ "$rc" != "0" ]]; then - /usr/bin/logger -t "$logger_tag" "/usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" + /usr/bin/logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" fi else /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" @@ -46,16 +46,16 @@ function process_fencing() case "$crm_alert_kind" in node|nodes) msg="Node '${crm_alert_node}' is now '${crm_alert_desc}'" - /usr/bin/logger -t "$logger_tag" "$msg" + /usr/bin/logger -t "$logger_tag" "INFO: $msg" ;; fencing) msg="Fencing for '${crm_alert_node}': ${crm_alert_desc}" - /usr/bin/logger -t "$logger_tag" "$msg" + /usr/bin/logger -t "$logger_tag" "INFO: $msg" process_fencing ;; *) msg="Unhandled '$crm_alert_kind' alert (${crm_alert_desc})" - /usr/bin/logger -t "$logger_tag" "$msg" + /usr/bin/logger -t "$logger_tag" "INFO: $msg" ;; esac /usr/bin/logger -t "$logger_tag" "AH: end event '$crm_alert_kind'" From 63d8a2ee974517899c8a2888e115a7e88d55292f Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 12 Jun 2024 13:57:31 +0200 Subject: [PATCH 24/84] angi: alert/SAPHanaSR-alert - generate cache file --- alert/SAPHanaSR-alert | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 7f101f7d..9ee23587 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -25,6 +25,11 @@ function process_fencing() { # SAPHanaSR_site_cache has format (each line) host:site_name # figure out fenced site + + if [[ ! -e "$cache_file" ]]; then + ## TODO must be not hard-coded (first only test creating the cache file by ower own) + SAPHanaSR-showAttr --sid=HA1 --format=tester | grep site= | sed -e 's/Host\///' -e 's/\/site=/:/' -e 's/"//g' > /run/crm/SAPHanaSR_site_cache + fi if [[ -e "$cache_file" ]]; then fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file") From 8064416b277ce25fda83733893515ae4c94b5c3c Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 12 Jun 2024 14:00:10 +0200 Subject: [PATCH 25/84] angi: alert/SAPHanaSR-alert - independent from SID --- alert/SAPHanaSR-alert | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 9ee23587..ffea3009 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -27,8 +27,7 @@ function process_fencing() # figure out fenced site if [[ ! -e "$cache_file" ]]; then - ## TODO must be not hard-coded (first only test creating the cache file by ower own) - SAPHanaSR-showAttr --sid=HA1 --format=tester | grep site= | sed -e 's/Host\///' -e 's/\/site=/:/' -e 's/"//g' > /run/crm/SAPHanaSR_site_cache + SAPHanaSR-showAttr --format=tester | grep site= | sed -e 's/Host\///' -e 's/\/site=/:/' -e 's/"//g' > /run/crm/SAPHanaSR_site_cache fi if [[ -e "$cache_file" ]]; then From a00fa2fb6edb7a10a9742ad8abd99f5f3d70b4bf Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 12 Jun 2024 16:05:26 +0200 Subject: [PATCH 26/84] angi: saphanasr-filesystem (lib+RA) - creating site_cache_file --- ra/SAPHanaFilesystem | 2 +- ra/saphana-filesystem-lib | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ra/SAPHanaFilesystem b/ra/SAPHanaFilesystem index 9ea34e0e..caa2f6a6 100755 --- a/ra/SAPHanaFilesystem +++ b/ra/SAPHanaFilesystem @@ -21,7 +21,7 @@ # OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) # ####################################################################### -SAPHanaFilesystemVersion="1.2.7" +SAPHanaFilesystemVersion="1.2.8" # # Initialization: timeB=$(date '+%s') diff --git a/ra/saphana-filesystem-lib b/ra/saphana-filesystem-lib index 48c2fb8d..861a0112 100755 --- a/ra/saphana-filesystem-lib +++ b/ra/saphana-filesystem-lib @@ -354,6 +354,8 @@ function shfs_monitor() { ;; esac fi + + ( SAPHanaSR-showAttr --format=tester | grep site= | sed -e 's/Host\///' -e 's/\/site=/:/' -e 's/"//g' > /run/crm/SAPHanaSR_site_cache; chmod 644 /run/crm/SAPHanaSR_site_cache )& super_ocf_log info "FLOW ${FUNCNAME[0]} rc=$rc" return "$rc" } # end function shfs_monitor From fb774b62db567cf625ba043ee1f5daa3796d5af7 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 12 Jun 2024 16:06:49 +0200 Subject: [PATCH 27/84] angi: alert/SAPHanaSR-alert - added ceckfunction; added debugging messages; delay fencing to allow log messages to be sent/stored --- alert/SAPHanaSR-alert | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index ffea3009..4ecc4e6d 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -5,7 +5,7 @@ # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2024 SUSE LLC -# 2024-06-12-13:40 +# 2024-06-12-15:13 # # crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing @@ -26,15 +26,14 @@ function process_fencing() # SAPHanaSR_site_cache has format (each line) host:site_name # figure out fenced site - if [[ ! -e "$cache_file" ]]; then - SAPHanaSR-showAttr --format=tester | grep site= | sed -e 's/Host\///' -e 's/\/site=/:/' -e 's/"//g' > /run/crm/SAPHanaSR_site_cache - fi - if [[ -e "$cache_file" ]]; then fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file") local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file") + /usr/bin/logger -t "$logger_tag" "INFO: cache_file=$cache_file, crm_alert_node=$crm_alert_node" + /usr/bin/logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" + sleep 10 /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" if [[ "$rc" != "0" ]]; then /usr/bin/logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" @@ -47,6 +46,26 @@ function process_fencing() fi } +function check_fencing() +{ + # SAPHanaSR_site_cache has format (each line) host:site_name + # figure out fenced site + + if [[ -e "$cache_file" ]]; then + fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file") + local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file") + /usr/bin/logger -t "$logger_tag" "INFO: cache_file=$cache_file" + /usr/bin/logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" + if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then + /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" + else + /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" + fi + else + /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE (no cache)" + fi +} + case "$crm_alert_kind" in node|nodes) msg="Node '${crm_alert_node}' is now '${crm_alert_desc}'" @@ -57,6 +76,11 @@ case "$crm_alert_kind" in /usr/bin/logger -t "$logger_tag" "INFO: $msg" process_fencing ;; + check) + msg="Checking for '${crm_alert_node}': ${crm_alert_desc}" + /usr/bin/logger -t "$logger_tag" "INFO: $msg" + check_fencing + ;; *) msg="Unhandled '$crm_alert_kind' alert (${crm_alert_desc})" /usr/bin/logger -t "$logger_tag" "INFO: $msg" From 58e1788b0cff19228f46da692eab8410101549e9 Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 13 Jun 2024 13:57:29 +0200 Subject: [PATCH 28/84] SAPHanaSR-alert.8: examples --- man/SAPHanaSR-alert.8 | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 480f481a..86495345 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -71,18 +71,37 @@ Alternate way for configuring the alert agent. # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing .RE .PP -\fB*\fR Example for showing all configured alert agents. +\fB*\fR Showing all configured alert agents. .PP .RS 2 # crm configure show | grep -A1 alert .RE .PP -\fB*\fR Example for showing agent messages. +\fB*\fR Showing agent messages. .PP .RS 2 # grep SAPHanaSR-alert /var/log/messages .RE .PP +\fB*\fR Showing history of fence actions. +.PP +.RS 2 +# crm_mon -1 --include=none,fencing +.RE +.PP +\fB*\fR Example for manually fencing an node. +.PP +This could be done for testing the SAPHanaSR-alert agent integration. +This test should not be done on production systems. +See manual page crm(8) for details. +Fenced node is node1. +.br +Note: Understand the impact before trying. +.PP +.RS 2 +# crm node fence node1 +.RE +.PP .\" .SH FILES .TP From a2db115f9b20bb4fc26233763ddaff4602c56246 Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 13 Jun 2024 17:11:34 +0200 Subject: [PATCH 29/84] SAPHanaSR-alert: crm vs. stonith_admin? --- alert/SAPHanaSR-alert | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 4ecc4e6d..b3e7b82b 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -5,7 +5,7 @@ # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2024 SUSE LLC -# 2024-06-12-15:13 +# 2024-06-13-17:10 # # crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing @@ -34,7 +34,9 @@ function process_fencing() if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" sleep 10 - /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" + # TODO PRIO1: crm vs. stonith_admin + # /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" + /usr/sbin/stonith_admin --fence="${HOSTNAME}" --tolerance=120; rc="$?" if [[ "$rc" != "0" ]]; then /usr/bin/logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" fi From b8679702a20440bd91ad1e828070a589c63b12fc Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 14 Jun 2024 09:19:06 +0200 Subject: [PATCH 30/84] SAPHanaSR-alert.8: requirements --- man/SAPHanaSR-alert.8 | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 86495345..7bb2d056 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -117,14 +117,16 @@ the internal cache for host to site relation - do not touch this file .PP 2. SAP HANA scale-out performance-optimized scenario. .PP -3. Only one SID controlled by the Linux cluster. +3. Only one SID is controlled by the Linux cluster. .PP -4. No other alert agent should be configured for the fencing alert. +4. Site names and host names should not be changed. .PP -5. Automatic re-start on just fenced nodes should be disabled. See manual page +5. No other alert agent should be configured for the fencing alert. +.PP +6. Automatic re-start on just fenced nodes should be disabled. See manual page sbd(8) for details. .PP -6. The alert agent runtime almost completely depends on call-outs to OS and +7. The alert agent runtime almost completely depends on call-outs to OS and Linux cluster. .\" .SH BUGS From a2d8a658834bbf20be40d2738bee4587551101ca Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 14 Jun 2024 09:27:49 +0200 Subject: [PATCH 31/84] SAPHanaSR-alert: logger="/usr/bin/logger" --- alert/SAPHanaSR-alert | 44 ++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index b3e7b82b..9d31775d 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -5,20 +5,22 @@ # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2024 SUSE LLC -# 2024-06-13-17:10 +# Version: 2024-06-13-17:10 # # crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing # logger_tag="SAPHanaSR-alert" +logger="/usr/bin/logger" # ON_FAIL_ACTION="${OCF_RESKEY_ON_FAIL_ACTION:-proceed}" CRM_alert_recipient="${CRM_alert_recipient:-/dev/null}" crm_alert_kind="${CRM_alert_kind:-manual call}" crm_alert_node="${CRM_alert_node:-$HOSTNAME}" crm_alert_desc="${CRM_alert_desc:-no description provided}" -/usr/bin/logger -t "$logger_tag" "AH: begin event '$crm_alert_kind'" + +$logger -t "$logger_tag" "AH: begin event '$crm_alert_kind'" cache_file="/run/crm/SAPHanaSR_site_cache" function process_fencing() @@ -29,22 +31,22 @@ function process_fencing() if [[ -e "$cache_file" ]]; then fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file") local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file") - /usr/bin/logger -t "$logger_tag" "INFO: cache_file=$cache_file, crm_alert_node=$crm_alert_node" - /usr/bin/logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" + $logger -t "$logger_tag" "INFO: cache_file=$cache_file, crm_alert_node=$crm_alert_node" + $logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then - /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" - sleep 10 + $logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" + sleep 10 # TODO PRIO1: crm vs. stonith_admin # /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" /usr/sbin/stonith_admin --fence="${HOSTNAME}" --tolerance=120; rc="$?" if [[ "$rc" != "0" ]]; then - /usr/bin/logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" + $logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" fi else - /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" + $logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" fi else - /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE (no cache)" + $logger -t "$logger_tag" "DEC: NO FENCE (no cache)" fi } @@ -56,37 +58,37 @@ function check_fencing() if [[ -e "$cache_file" ]]; then fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file") local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file") - /usr/bin/logger -t "$logger_tag" "INFO: cache_file=$cache_file" - /usr/bin/logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" + $logger -t "$logger_tag" "INFO: cache_file=$cache_file" + $logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then - /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" + $logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" else - /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" + $logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" fi else - /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE (no cache)" + $logger -t "$logger_tag" "DEC: NO FENCE (no cache)" fi } case "$crm_alert_kind" in node|nodes) - msg="Node '${crm_alert_node}' is now '${crm_alert_desc}'" - /usr/bin/logger -t "$logger_tag" "INFO: $msg" + msg="Node '${crm_alert_node}' is now '${crm_alert_desc}'" + $logger -t "$logger_tag" "INFO: $msg" ;; fencing) msg="Fencing for '${crm_alert_node}': ${crm_alert_desc}" - /usr/bin/logger -t "$logger_tag" "INFO: $msg" + $logger -t "$logger_tag" "INFO: $msg" process_fencing ;; check) msg="Checking for '${crm_alert_node}': ${crm_alert_desc}" - /usr/bin/logger -t "$logger_tag" "INFO: $msg" + $logger -t "$logger_tag" "INFO: $msg" check_fencing ;; *) - msg="Unhandled '$crm_alert_kind' alert (${crm_alert_desc})" - /usr/bin/logger -t "$logger_tag" "INFO: $msg" + msg="Unhandled '$crm_alert_kind' alert (${crm_alert_desc})" + $logger -t "$logger_tag" "INFO: $msg" ;; esac -/usr/bin/logger -t "$logger_tag" "AH: end event '$crm_alert_kind'" +$logger -t "$logger_tag" "AH: end event '$crm_alert_kind'" # From 6f757443614dccde0b9702a16055c55411470396 Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 14 Jun 2024 13:41:22 +0200 Subject: [PATCH 32/84] SAPHanaSR-alert: back to crm node fence --- alert/SAPHanaSR-alert | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 9d31775d..1b160a63 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -37,8 +37,8 @@ function process_fencing() $logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" sleep 10 # TODO PRIO1: crm vs. stonith_admin - # /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" - /usr/sbin/stonith_admin --fence="${HOSTNAME}" --tolerance=120; rc="$?" + /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" + # /usr/sbin/stonith_admin --fence="${HOSTNAME}" --tolerance=120; rc="$?" if [[ "$rc" != "0" ]]; then $logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" fi From fca9b97d3be61d6cc98fbba8b178eb9241376cf1 Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 14 Jun 2024 13:47:30 +0200 Subject: [PATCH 33/84] SAPHanaSR-alert.8: requirements --- man/SAPHanaSR-alert.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 7bb2d056..2e3b5d0b 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -113,7 +113,7 @@ the internal cache for host to site relation - do not touch this file .PP .\" .SH REQUIREMENTS -1. Pacemaker 2.1.6 or newer. +1. Pacemaker 2.1.2 or newer. .PP 2. SAP HANA scale-out performance-optimized scenario. .PP From c0aa31e48625cd1713b0e107144651ff5b289fd6 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 18 Jun 2024 15:13:17 +0200 Subject: [PATCH 34/84] angi: alert/SAPHanaSR-alert - fencing delay for alert script (first 300s hardcoded) --- alert/SAPHanaSR-alert | 46 ++++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 4ecc4e6d..21204187 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -21,29 +21,39 @@ crm_alert_desc="${CRM_alert_desc:-no description provided}" /usr/bin/logger -t "$logger_tag" "AH: begin event '$crm_alert_kind'" cache_file="/run/crm/SAPHanaSR_site_cache" +crm_alert_fencing_delay=300 + +IFS=. read -r sys_uptime REST Date: Tue, 18 Jun 2024 15:19:28 +0200 Subject: [PATCH 35/84] angi: alert/SAPHanaSR-alert - reimplemnt Lars change to user variable to call logger --- alert/SAPHanaSR-alert | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert index 1884f227..5a032a95 100755 --- a/alert/SAPHanaSR-alert +++ b/alert/SAPHanaSR-alert @@ -38,23 +38,23 @@ function process_fencing() if [[ -e "$cache_file" ]]; then fenced_site_name=$(awk -F: '$1 == host { print $2 }' host="${crm_alert_node}" "$cache_file") local_site_name=$(awk -F: '$1 == host { print $2 }' host="${HOSTNAME}" "$cache_file") - /usr/bin/logger -t "$logger_tag" "INFO: cache_file=$cache_file, crm_alert_node=$crm_alert_node" - /usr/bin/logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" + $logger -t "$logger_tag" "INFO: cache_file=$cache_file, crm_alert_node=$crm_alert_node" + $logger -t "$logger_tag" "INFO: fenced_site_name=$fenced_site_name, local_site_name=$local_site_name" if [[ "$local_site_name" != "" && "$fenced_site_name" == "$local_site_name" ]]; then - /usr/bin/logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" + $logger -t "$logger_tag" "DEC: FENCE ($fenced_site_name == $local_site_name)" sleep 10 /usr/sbin/crm --force node fence "${HOSTNAME}"; rc="$?" if [[ "$rc" != "0" ]]; then - /usr/bin/logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" + $logger -t "$logger_tag" "ACT: /usr/sbin/crm --force node fence ${HOSTNAME}; rc=$rc" fi else - /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" + $logger -t "$logger_tag" "DEC: NO FENCE ($fenced_site_name != $local_site_name)" fi else - /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE (no cache)" + $logger -t "$logger_tag" "DEC: NO FENCE (no cache)" fi else - /usr/bin/logger -t "$logger_tag" "DEC: NO FENCE (uptime < crm_alert_fencing_delay)" + $logger -t "$logger_tag" "DEC: NO FENCE (uptime < crm_alert_fencing_delay)" fi } From 6f4aa4fa6ac8039160fefe4519bbdbbf9b9bb9f1 Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 18 Jun 2024 15:20:37 +0200 Subject: [PATCH 36/84] SAPHanaSR-alert.8: example --- man/SAPHanaSR-alert.8 | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert.8 index 2e3b5d0b..e7d2ccc6 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert.8 @@ -83,10 +83,14 @@ Alternate way for configuring the alert agent. # grep SAPHanaSR-alert /var/log/messages .RE .PP -\fB*\fR Showing history of fence actions. +\fB*\fR Showing history of fence actions and cleaning it up. +.PP +Example node with failed fencing action is node22. .PP .RS 2 # crm_mon -1 --include=none,fencing +.br +# stonith_admin --cleanup --history node22 .RE .PP \fB*\fR Example for manually fencing an node. From bd1dbf22dd0c735f8212fd4a17ed4c7524e97093 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 18 Jun 2024 15:24:28 +0200 Subject: [PATCH 37/84] angi: renaming SAPHanaSR-alert --- alert/{SAPHanaSR-alert => SAPHanaSR-alert-fencing} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename alert/{SAPHanaSR-alert => SAPHanaSR-alert-fencing} (100%) diff --git a/alert/SAPHanaSR-alert b/alert/SAPHanaSR-alert-fencing similarity index 100% rename from alert/SAPHanaSR-alert rename to alert/SAPHanaSR-alert-fencing From e952f75a704906608f622c68ba25f280f51f61f4 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 18 Jun 2024 15:26:28 +0200 Subject: [PATCH 38/84] angi: SAPHanaSR-angi.spec - renamed SAPHanaSR-alert also in package --- SAPHanaSR-angi.spec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/SAPHanaSR-angi.spec b/SAPHanaSR-angi.spec index 4066aa2d..65943472 100644 --- a/SAPHanaSR-angi.spec +++ b/SAPHanaSR-angi.spec @@ -97,7 +97,7 @@ install -m 0644 srHook/susChkSrv.py %{buildroot}/usr/share/%{name}/ install -m 0444 srHook/global.ini_* %{buildroot}/usr/share/%{name}/samples # alert manager -install -m 0755 alert/SAPHanaSR-alert %{buildroot}/usr/bin +install -m 0755 alert/SAPHanaSR-alert-fencing %{buildroot}/usr/bin # crm config templates install -m 0644 crm_cfg/angi-ScaleUp/[0-9]*_* %{buildroot}/usr/share/%{name}/samples/crm_cfg/angi-ScaleUp @@ -141,7 +141,7 @@ install -m 0444 tools/saphana_sr_tools.py %{buildroot}/usr/lib/%{name} /usr/bin/SAPHanaSR-filter-legacy /usr/bin/SAPHanaSR-hookHelper /usr/bin/SAPHanaSR-manageProvider -/usr/bin/SAPHanaSR-alert +/usr/bin/SAPHanaSR-alert-fencing %license LICENSE %dir %{_docdir}/%{name} From 5ba2cf209a320ddc4fa1dc6842175ddd24d5afd2 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 18 Jun 2024 15:35:00 +0200 Subject: [PATCH 39/84] angi: SAPHanaSR-alert-fencing - updated logger tag --- alert/SAPHanaSR-alert-fencing | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/alert/SAPHanaSR-alert-fencing b/alert/SAPHanaSR-alert-fencing index 5a032a95..da938aec 100755 --- a/alert/SAPHanaSR-alert-fencing +++ b/alert/SAPHanaSR-alert-fencing @@ -5,13 +5,13 @@ # Support: linux@sap.com # License: GNU General Public License (GPL) # Copyright: (c) 2024 SUSE LLC -# Version: 2024-06-13-17:10 +# Version: 2024-06-18-15:33 # # crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes # crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing # -logger_tag="SAPHanaSR-alert" +logger_tag="SAPHanaSR-alert-fencing" logger="/usr/bin/logger" # ON_FAIL_ACTION="${OCF_RESKEY_ON_FAIL_ACTION:-proceed}" From 329da7b135582e78dc16e908bbc8226678687daf Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 18 Jun 2024 15:41:16 +0200 Subject: [PATCH 40/84] SAPHanaSR-alert.8 SAPHanaSR-alert-fencing.8: renamed --- ...naSR-alert.8 => SAPHanaSR-alert-fencing.8} | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) rename man/{SAPHanaSR-alert.8 => SAPHanaSR-alert-fencing.8} (82%) diff --git a/man/SAPHanaSR-alert.8 b/man/SAPHanaSR-alert-fencing.8 similarity index 82% rename from man/SAPHanaSR-alert.8 rename to man/SAPHanaSR-alert-fencing.8 index e7d2ccc6..9ad1570b 100644 --- a/man/SAPHanaSR-alert.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -1,14 +1,13 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-alert 7 "12 Jun 2024" "" "SAPHanaSR" -.\" TODO SAPHanaSR-alert-fencing ? +.TH SAPHanaSR-alert-fencing 7 "12 Jun 2024" "" "SAPHanaSR" .\" .SH NAME -SAPHanaSR-alert \- Alert agent for cluster fencing alerts. +SAPHanaSR-alert-fencing \- Alert agent for cluster fencing alerts. .PP .\" .SH DESCRIPTION -SAPHanaSR-alert can be used to react on Linux cluster fencing alerts. +SAPHanaSR-alert-fencing can be used to react on Linux cluster fencing alerts. .PP The Linux cluster provides an interface to take external action when a cluster event occurs (alert). Than the cluster calls an external program (an alert agent) @@ -33,10 +32,10 @@ and susChkSrv.py(7). .PP .\" .SH SUPPORTED PARAMETERS -.TP -\fBtimeout\fR -If the alert agent does not complete within this amount of time, it will be terminated. Optional, default "30s". Example "meta timeout=30s". -.TP +.\" .TP +.\" \fBtimeout\fR +.\" If the alert agent does not complete within this amount of time, it will be terminated. Optional, default "30s". Example "meta timeout=30s". +.\" .TP \fBenabled\fR If false for an alert, the alert will not be used. If true for an alert and false for a particular recipient of that alert, that recipient will not be used. Optional, default "true". .\" @@ -50,7 +49,7 @@ Successful program execution. Usage, syntax or execution errors. .br In addition log entries are written, which can be scanned by using a pattern -like "SAPHanaSR-alert". +like "SAPHanaSR-alert-fencing". .PP .\" .SH EXAMPLES @@ -60,7 +59,7 @@ like "SAPHanaSR-alert". The following line needs to be added to the cluster´s CIB: .PP .RS 2 -alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +alert fencing-1 "/usr/bin/SAPHanaSR-alert-fencing" select fencing .RE .PP \fB*\fR Example for configuring the alert agent by using crm. @@ -68,7 +67,7 @@ alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing Alternate way for configuring the alert agent. .PP .RS 2 -# crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +# crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert-fencing" select fencing .RE .PP \fB*\fR Showing all configured alert agents. @@ -80,7 +79,7 @@ Alternate way for configuring the alert agent. \fB*\fR Showing agent messages. .PP .RS 2 -# grep SAPHanaSR-alert /var/log/messages +# grep SAPHanaSR-alert-fencing /var/log/messages .RE .PP \fB*\fR Showing history of fence actions and cleaning it up. @@ -95,7 +94,7 @@ Example node with failed fencing action is node22. .PP \fB*\fR Example for manually fencing an node. .PP -This could be done for testing the SAPHanaSR-alert agent integration. +This could be done for testing the SAPHanaSR-alert-fencing agent integration. This test should not be done on production systems. See manual page crm(8) for details. Fenced node is node1. @@ -109,7 +108,7 @@ Note: Understand the impact before trying. .\" .SH FILES .TP -/usr/bin/SAPHanaSR-alert +/usr/bin/SAPHanaSR-alert-fencing the alert agent .TP /run/crm/SAPHanaSR_site_cache @@ -155,7 +154,7 @@ F.Herschel, L.Pinne. .br (c) 2024 SUSE LLC .br -SAPHanaSR-alert comes with ABSOLUTELY NO WARRANTY. +SAPHanaSR-alert-fencing comes with ABSOLUTELY NO WARRANTY. .br For details see the GNU General Public License at http://www.gnu.org/licenses/gpl.html From e97a26e4b90e67f9a41503e8ae0a0b197d7a91e2 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Tue, 18 Jun 2024 16:52:50 +0200 Subject: [PATCH 41/84] angi: SAPHanaSR-alert-fencing - allow to set alert_fencing_delay --- alert/SAPHanaSR-alert-fencing | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/alert/SAPHanaSR-alert-fencing b/alert/SAPHanaSR-alert-fencing index da938aec..9cb4a128 100755 --- a/alert/SAPHanaSR-alert-fencing +++ b/alert/SAPHanaSR-alert-fencing @@ -8,7 +8,7 @@ # Version: 2024-06-18-15:33 # # crm configure alert nodes-1 "/usr/bin/SAPHanaSR-alert" select nodes -# crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing +# crm configure alert fencing-1 "/usr/bin/SAPHanaSR-alert" select fencing attributes alert_fencing_delay=300 # logger_tag="SAPHanaSR-alert-fencing" @@ -23,10 +23,10 @@ crm_alert_desc="${CRM_alert_desc:-no description provided}" $logger -t "$logger_tag" "AH: begin event '$crm_alert_kind'" cache_file="/run/crm/SAPHanaSR_site_cache" -crm_alert_fencing_delay=300 +alert_fencing_delay="${alert_fencing_delay:-300}" IFS=. read -r sys_uptime REST Date: Tue, 18 Jun 2024 17:01:46 +0200 Subject: [PATCH 42/84] SAPHanaSR-alert-fencing.8: alert_fencing_delay --- man/SAPHanaSR-alert-fencing.8 | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index 9ad1570b..aecceb63 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -32,12 +32,15 @@ and susChkSrv.py(7). .PP .\" .SH SUPPORTED PARAMETERS +.TP +\fBtimeout\fR +If the alert agent does not complete within this amount of time, it will be terminated. Optional, default "30s". Example "meta timeout=30s". .\" .TP -.\" \fBtimeout\fR -.\" If the alert agent does not complete within this amount of time, it will be terminated. Optional, default "30s". Example "meta timeout=30s". -.\" .TP -\fBenabled\fR -If false for an alert, the alert will not be used. If true for an alert and false for a particular recipient of that alert, that recipient will not be used. Optional, default "true". +.\" \fBenabled\fR +.\" If false for an alert, the alert will not be used. If true for an alert and false for a particular recipient of that alert, that recipient will not be used. Optional, default "true". +.TP +\fBalert_fencing_delay\fR +How long a node must be up and running (uptime) before fencing alerts will be processed. This avoids fencing loops. Optional, default "300". .\" .PP .\" @@ -56,10 +59,14 @@ like "SAPHanaSR-alert-fencing". .PP \fB*\fR Example configuration for the fencing alert handler. .PP -The following line needs to be added to the cluster´s CIB: +The following lines needs to be added to the cluster´s CIB: .PP .RS 2 -alert fencing-1 "/usr/bin/SAPHanaSR-alert-fencing" select fencing +alert fencing-1 "/usr/bin/SAPHanaSR-alert-fencing" \\ +.br +select fencing \\ +.br +attributes alert_fencing_delay=300 .RE .PP \fB*\fR Example for configuring the alert agent by using crm. From b9cf87cdd1114473fbe0001cfae3e0256227aac6 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 19 Jun 2024 14:26:29 +0200 Subject: [PATCH 43/84] angi: ra+tools: new formats csv and cache, new selection sitelist --- ra/saphana-filesystem-lib | 5 ++++- tools/SAPHanaSR-showAttr | 7 +++++++ tools/saphana_sr_tools.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/ra/saphana-filesystem-lib b/ra/saphana-filesystem-lib index 861a0112..cc32ed74 100755 --- a/ra/saphana-filesystem-lib +++ b/ra/saphana-filesystem-lib @@ -355,7 +355,10 @@ function shfs_monitor() { esac fi - ( SAPHanaSR-showAttr --format=tester | grep site= | sed -e 's/Host\///' -e 's/\/site=/:/' -e 's/"//g' > /run/crm/SAPHanaSR_site_cache; chmod 644 /run/crm/SAPHanaSR_site_cache )& + # TODO PRIO2: Param to switch writing cache file on/off needed? + # TODO PRIO2: Writing cache file only for scale-out? + # DONE PRIO1: Remove grep and sed to adjust output + ( SAPHanaSR-showAttr --format=cache --select=sitelist > /run/crm/SAPHanaSR_site_cache; chmod 644 /run/crm/SAPHanaSR_site_cache )& super_ocf_log info "FLOW ${FUNCNAME[0]} rc=$rc" return "$rc" } # end function shfs_monitor diff --git a/tools/SAPHanaSR-showAttr b/tools/SAPHanaSR-showAttr index 793ff7e1..f8530281 100755 --- a/tools/SAPHanaSR-showAttr +++ b/tools/SAPHanaSR-showAttr @@ -153,3 +153,10 @@ if __name__ == "__main__": myHana.print_dic_as_path(myHana.res_dict, "resource", "Resource", quote='"') myHana.print_dic_as_path(myHana.site_dict, "site", "Site", quote='"') myHana.print_dic_as_path(myHana.host_dict, "host", "Host", quote='"') + elif oformat in {"cache"}: + myHana.print_dic_as_csv(myHana.host_dict, "host", "Host", quote='', short=True) + elif oformat in {"csv"}: + myHana.print_dic_as_csv(myHana.glob_dict, "global", "Global", quote='') + myHana.print_dic_as_csv(myHana.res_dict, "resource", "Resource", quote='') + myHana.print_dic_as_csv(myHana.site_dict, "site", "Site", quote='') + myHana.print_dic_as_csv(myHana.host_dict, "host", "Host", quote='') diff --git a/tools/saphana_sr_tools.py b/tools/saphana_sr_tools.py index e83eaf83..f733cadc 100644 --- a/tools/saphana_sr_tools.py +++ b/tools/saphana_sr_tools.py @@ -131,6 +131,12 @@ class HanaCluster(): 'site': ['Site', 'lpt', 'lss', 'mns', 'opMode', 'srHook', 'srMode', 'srPoll', 'srr'], 'host': ['Host', 'clone_state', 'node_state', 'roles', 'score', 'site', 'sra', 'srah', 'standby', 'vhost', 'fail.*'], }, + 'sitelist': { + 'global': [], + 'resource': [], + 'site': [], + 'host': ['site'], + }, } def __init__(self): @@ -499,6 +505,29 @@ def print_dic_as_path(self, print_dic, area, table_name, **kargs): value = print_dic[key][col] print(f"{time_string}{table_name}/{key}/{col}={quote}{value}{quote}") + def print_dic_as_csv(self, print_dic, area, table_name, **kargs): + """ + TODO: description + """ + time_string = "" + quote = '' + short = False + if 'quote' in kargs: + quote = kargs['quote'] + if 'ts' in kargs: + time_string = f"{kargs['ts']} " + if 'short' in kargs: + short = kargs['short'] + for key in print_dic: + for col in print_dic[key]: + if self.filter(area, col) is True: + value = print_dic[key][col] + if short: + print(f"{key}:{quote}{value}{quote}") + else: + #print(f"{time_string}{table_name}/{key}/{col}={quote}{value}{quote}") + print(f"{table_name}:{key}:{col}:{quote}{value}{quote}") + def filter(self, area, column_name): ''' filter column_names False, if column should be skipped From 963c0617ffcdc3462bf4d566da2e76f505792566 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Wed, 19 Jun 2024 18:50:20 +0200 Subject: [PATCH 44/84] angi: some TODOs with low priority added --- ra/saphana-filesystem-lib | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ra/saphana-filesystem-lib b/ra/saphana-filesystem-lib index cc32ed74..4facbb05 100755 --- a/ra/saphana-filesystem-lib +++ b/ra/saphana-filesystem-lib @@ -357,6 +357,8 @@ function shfs_monitor() { # TODO PRIO2: Param to switch writing cache file on/off needed? # TODO PRIO2: Writing cache file only for scale-out? + # TODO PRIO3: Also caching info for primary-secondary role, if needed + # TODO PRIO3: Also caching info for system replication status (srHook) role, if needed # DONE PRIO1: Remove grep and sed to adjust output ( SAPHanaSR-showAttr --format=cache --select=sitelist > /run/crm/SAPHanaSR_site_cache; chmod 644 /run/crm/SAPHanaSR_site_cache )& super_ocf_log info "FLOW ${FUNCNAME[0]} rc=$rc" From 906b26a2694bf0aba578a0c99a702f5e96ea34bd Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 20 Jun 2024 16:52:54 +0200 Subject: [PATCH 45/84] SAPHanaSR-alert-fencing.8: sbd start delay --- man/SAPHanaSR-alert-fencing.8 | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index aecceb63..91f0be9a 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -40,7 +40,7 @@ If the alert agent does not complete within this amount of time, it will be term .\" If false for an alert, the alert will not be used. If true for an alert and false for a particular recipient of that alert, that recipient will not be used. Optional, default "true". .TP \fBalert_fencing_delay\fR -How long a node must be up and running (uptime) before fencing alerts will be processed. This avoids fencing loops. Optional, default "300". +How long a node must be up and running (uptime) before fencing alerts will be processed. This avoids fencing loops. Optional, default "300". Example "attributes alert_fencing_delay=300". .\" .PP .\" @@ -120,6 +120,9 @@ the alert agent .TP /run/crm/SAPHanaSR_site_cache the internal cache for host to site relation - do not touch this file +.TP +/etc/sysconfig/sbd +config file for SBD daemon .PP .\" .SH REQUIREMENTS @@ -133,8 +136,10 @@ the internal cache for host to site relation - do not touch this file .PP 5. No other alert agent should be configured for the fencing alert. .PP -6. Automatic re-start on just fenced nodes should be disabled. See manual page -sbd(8) for details. +6. Automatic restart of just fenced nodes should be disabled by adapting +SBD_START_MODE. In case of automatic restart of just fenced nodes, it might be +necessary to adapt SBD_START_DELAY in order to avoid fencing loops. See manual +page sbd(8). .PP 7. The alert agent runtime almost completely depends on call-outs to OS and Linux cluster. @@ -148,7 +153,7 @@ Please report any other feedback and suggestions to feedback@suse.com. .SH SEE ALSO \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , -\fBsusChkSrv.py\fP (7) , \fBcrm\fP(8) , +\fBsusChkSrv.py\fP (7) , \fBcrm\fP(8) , \fBsbd\fP(8) , .br https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/singlehtml/#alert-agents .PP From c508831aaecc2431a4bc20a6480b756a4aa55353 Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 21 Jun 2024 15:05:33 +0200 Subject: [PATCH 46/84] ocf_suse_SAPHana.7 ocf_suse_SAPHanaController.7: DUPLICATE...TIMESTAMP -> DUPLICATE...TIMEOUT, git issue 126 --- man/ocf_suse_SAPHana.7 | 8 ++++---- man/ocf_suse_SAPHanaController.7 | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/man/ocf_suse_SAPHana.7 b/man/ocf_suse_SAPHana.7 index bb003b63..f1af26b1 100644 --- a/man/ocf_suse_SAPHana.7 +++ b/man/ocf_suse_SAPHana.7 @@ -1,6 +1,6 @@ .\" Version: 0.160.1 .\" -.TH ocf_suse_SAPHana 7 "13 Dec 2023" "" "OCF resource agents" +.TH ocf_suse_SAPHana 7 "21 Jun 2024" "" "OCF resource agents" .\" .SH NAME SAPHana \- Manages takeover between two SAP HANA databases with system replication. @@ -145,10 +145,10 @@ Optional. Default value: false\&. .RS 4 Time difference needed between two primary time stamps (LPTs), in case a dual-primary situation occurs. If the difference between both node's -last primary time stamps is less than DUPLICATE_PRIMARY_TIMESTAMP, +last primary time stamps is less than DUPLICATE_PRIMARY_TIMEOUT, then the cluster holds one or both instances in a "WAITING" status. This is to give an admin the chance to react on a takeover. -Note: How the cluster proceeds after the DUPLICATE_PRIMARY_TIMESTAMP +Note: How the cluster proceeds after the DUPLICATE_PRIMARY_TIMEOUT has passed, depends on the parameter AUTOMATED_REGISTER. See also the examples section below. .br @@ -596,7 +596,7 @@ F.Herschel, L.Pinne. .br (c) 2015-2017 SUSE Linux GmbH, Germany. .br -(c) 2018-2023 SUSE LLC +(c) 2018-2024 SUSE LLC .br The resource agent SAPHana comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/ocf_suse_SAPHanaController.7 b/man/ocf_suse_SAPHanaController.7 index 9b577deb..16acca81 100644 --- a/man/ocf_suse_SAPHanaController.7 +++ b/man/ocf_suse_SAPHanaController.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH ocf_suse_SAPHanaController 7 "14 Mar 2024" "" "OCF resource agents" +.TH ocf_suse_SAPHanaController 7 "21 Jun 2024" "" "OCF resource agents" .\" .SH NAME SAPHanaController \- Manages takeover between two SAP HANA databases with system replication. @@ -159,8 +159,8 @@ Optional. Default value: false\&. .PP \fBDUPLICATE_PRIMARY_TIMEOUT\fR .RS 4 -Time difference needed between two primary time stamps (LPTs), in case a dual-primary situation occurs. If the difference between both node's last primary time stamps is less than DUPLICATE_PRIMARY_TIMESTAMP, then the cluster holds one or both instances in a "WAITING" status. This is to give an admin the chance to react on a failover. -Note: How the cluster proceeds after the DUPLICATE_PRIMARY_TIMESTAMP has passed, depends on the parameter AUTOMATED_REGISTER. See also the examples section below. +Time difference needed between two primary time stamps (LPTs), in case a dual-primary situation occurs. If the difference between both node's last primary time stamps is less than DUPLICATE_PRIMARY_TIMEOUT, then the cluster holds one or both instances in a "WAITING" status. This is to give an admin the chance to react on a failover. +Note: How the cluster proceeds after the DUPLICATE_PRIMARY_TIMEOUT has passed, depends on the parameter AUTOMATED_REGISTER. See also the examples section below. .br Optional. Default value: 7200\&. .RE From d1848e3220e2abcefebfde105d2add5a356493be Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 21 Jun 2024 15:18:22 +0200 Subject: [PATCH 47/84] SAPHanaSR-showAttr.8: new options for alert agent --- man/SAPHanaSR-showAttr.8 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/SAPHanaSR-showAttr.8 b/man/SAPHanaSR-showAttr.8 index 6a5411ca..438db263 100644 --- a/man/SAPHanaSR-showAttr.8 +++ b/man/SAPHanaSR-showAttr.8 @@ -532,7 +532,7 @@ show version. .TP 4 \fB --select\fR \fISELECT\fR .\" TODO explain meaning of values -show selected information only. Allowed values: [ all | default | minimal | sr | cluster | cluster2 | cluster3 ]. Default is default. +show selected information only. Allowed values: [ all | default | minimal | sr | cluster | cluster2 | cluster3 | skitelist ]. Default is default. .TP 4 \fB --sid\fR \fISID\fR use SAP system ID \fISID\fR. Should be autodetected, if there is only one SAP HANA instance installed on the local cluster node. The SAP system ID is a 3 alphanum string with a valid SAP system name like SLE, HAE, FH1, C11, or P42. @@ -542,7 +542,7 @@ use SAP system ID \fISID\fR. Should be autodetected, if there is only one SAP HA sort Hosts section table by field. Allowed values: [ roles | site ]. Default is sort by hostnames. .TP 4 \fB --format\fR \fIFORMAT\fR -output format. Allowed values: [ script | tables | json | tester ]. Default is tables. +output format. Allowed values: [ script | tables | json | tester | csv | cache ]. Default is tables. .TP 4 \fB --cib\fR \fIOFFLINE_CIB_FILE\fR read data from given offline CIB file. From 9a8dda3d7916090665d171d8d40b23f3d521f448 Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 21 Jun 2024 15:23:56 +0200 Subject: [PATCH 48/84] SAPHanaSR-alert-fencing.8: wording --- man/SAPHanaSR-alert-fencing.8 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index 91f0be9a..d8f19224 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -18,8 +18,7 @@ on each active cluster node. The agent checks whether the local node belongs to the same HANA site as the fenced node. If so, it asks the cluster to fence the local node as well. .PP -SAPHanaSR-agent is used to fence all nodes of an HANA site, in case one of -them gets fenced. This improves three use cases for HANA scale-out: +This improves three use cases for HANA scale-out: .br - HA/DR provider hook script susChkSrv.py action_on_lost=fence .br From 062dffacff645efb547ef1b5dcf2622658afb5cb Mon Sep 17 00:00:00 2001 From: lpinne Date: Mon, 24 Jun 2024 16:01:26 +0200 Subject: [PATCH 49/84] ocf_suse_SAPHanaController.7 ocf_suse_SAPHanaFilesystem.7 susChkSrv.py.7: SAPHanaSR-alert-fencing --- man/ocf_suse_SAPHanaController.7 | 9 ++++++++- man/ocf_suse_SAPHanaFilesystem.7 | 11 +++++++---- man/susChkSrv.py.7 | 18 +++++++++++------- 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/man/ocf_suse_SAPHanaController.7 b/man/ocf_suse_SAPHanaController.7 index 16acca81..63e34754 100644 --- a/man/ocf_suse_SAPHanaController.7 +++ b/man/ocf_suse_SAPHanaController.7 @@ -146,6 +146,7 @@ Values: [ proceed | fence ]. - proceed: proceed the failure as usual, i.e. initiate demote-stop sequence. .br - fence: trigger stop failure and node fencing, if conditions are matched. +.\" TODO: SAPHanaSR-alert-fencing .br Experimental (Optional). Default value: proceed. .RE @@ -518,7 +519,13 @@ Please report any other feedback and suggestions to feedback@suse.com. .PP .\" .SH SEE ALSO -\fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , \fBocf_heartbeat_IPaddr2\fP(8) , \fBSAPHanaSR-monitor\fP(8) , \fBSAPHanaSR-showAttr\fP(8) , \fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBSAPHanaSR_maintenance_examples\fP(7) , \fBSAPHanaSR_basic_cluster\fP(7) , \fBSAPHanaSR-ScaleOut_basic_cluster\fP(7) , \fBSAPHanaSR-manageAttr\fP(8) , \fBchrony.conf\fP(5) , \fBstonith\fP(8) , \fBcrm\fP(8) +\fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , +\fBocf_heartbeat_IPaddr2\fP(8) , \fBSAPHanaSR-monitor\fP(8) , \fBSAPHanaSR-showAttr\fP(8) , +\fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , +\fBSAPHanaSR_maintenance_examples\fP(7) , \fBSAPHanaSR_basic_cluster\fP(7) , +\fBSAPHanaSR-ScaleOut_basic_cluster\fP(7) , \fBSAPHanaSR-manageAttr\fP(8) , +\fBSAPHanaSR-alert-fencing\fP(8) , +\fBchrony.conf\fP(5) , \fBstonith\fP(8) , \fBcrm\fP(8) .br https://documentation.suse.com/sbp/sap/ , .br diff --git a/man/ocf_suse_SAPHanaFilesystem.7 b/man/ocf_suse_SAPHanaFilesystem.7 index 8363bb3d..10decf5a 100644 --- a/man/ocf_suse_SAPHanaFilesystem.7 +++ b/man/ocf_suse_SAPHanaFilesystem.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH ocf_suse_SAPHanaFilesystem 7 "24 Apr 2024" "" "OCF resource agents" +.TH ocf_suse_SAPHanaFilesystem 7 "24 Jun 2024" "" "OCF resource agents" .\" .SH NAME SAPHanaFilesystem \- Monitors mounted SAP HANA filesystems. @@ -426,8 +426,10 @@ SAPHanaSR-angi(7) and its references. .br 6. SAP HANA host auto-failover is currently not supported. .br -7. If an HANA worker node of a scale-out site got fenced but not the master -nameserver, the time needed for stopping the whole site depends on HANA timeouts. +7. For HANA scale-out, the SAPHanaSR-alert-fencing should be configured. See manual +page SAPHanaSR-alert-fencing(8) for details. +.\" 7. If an HANA worker node of a scale-out site got fenced but not the master +.\" nameserver, the time needed for stopping the whole site depends on HANA timeouts. .PP .\" .SH BUGS @@ -439,7 +441,8 @@ Please report any other feedback and suggestions to feedback@suse.com. .SH SEE ALSO \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaTopology\fP(7) , \fBsusHanaSR.py\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , -\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , +\fBSAPHanaSR-alert-fencing\fP(8) , \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR\fP(7) , +\fBSAPHanaSR-ScaleOut\fP(7) , \fBfstab\fP(5) , \fBmount\fP(8) , \fBnfs\fP(5) , .br https://documentation.suse.com/sbp/sap/ , diff --git a/man/susChkSrv.py.7 b/man/susChkSrv.py.7 index beb8bd3e..5726365f 100644 --- a/man/susChkSrv.py.7 +++ b/man/susChkSrv.py.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH susChkSrv.py 7 "18 Mar 2024" "" "SAPHanaSR" +.TH susChkSrv.py 7 "24 Jun 2024" "" "SAPHanaSR" .\" .SH NAME susChkSrv.py \- Provider for SAP HANA srHook method srServiceStateChanged(). @@ -75,8 +75,9 @@ If this is combined with SAPHanaController RA parameter 'AUTOMATED_REGISTER=true HANA needs to release all OS resources prior to the automated registering. .br - \fBfence\fP: do 'crm node fence <\fIhost\fR>'. This needs a Linux cluster -STONITH method and sudo permission. This action is primarily meant for scale-up.If it happens on a scale-out worker node, the remaining master needs to time -out before the Linux cluster will react. +STONITH method and sudo permission. This action is primarily meant for scale-up. +For scale-out, SAPHanaSR-agent-fencing should be configured additionionally, see +manual page SAPHanaSR-agent-fencing(8) for details. .br .\" TODO - suicide: do 'systemctl reboot'. Do NOT use this! .\" .br @@ -427,8 +428,10 @@ susChkSrv.py parameter 'action_on_lost=fence' is set. SAPHanaController parameter 'AUTOMATED_REGISTER=true' is set, it depends on HANA to release all OS resources prior to the registering attempt. .PP -10. If an HANA worker node of a scale-out site got fenced but not the master -nameserver, the time needed for stopping the whole site depends on HANA timeouts. +10. For HANA scale-out, the susChkSrv.py parameter 'action_on_lost=fence' should +be used only, if the SAPHanaSR-alert-fencing is configured. +.\" 10. If an HANA worker node of a scale-out site got fenced but not the master +.\" nameserver, the time needed for stopping the whole site depends on HANA timeouts. .PP 11. If the hook provider should be pre-compiled, the particular Python version that comes with SAP HANA has to be used. @@ -442,8 +445,9 @@ Please report any other feedback and suggestions to feedback@suse.com. .SH SEE ALSO \fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBSAPHanaSR.py\fP(7) , \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , -\fBSAPHanaSR-hookHelper\fP(8) , -\fBSAPHanaSR-manageProvider\fP(8) , \fBcrm\fP(8) , \fBcrm_attribute\fP(8) , +\fBSAPHanaSR-hookHelper\fP(8) , \fBSAPHanaSR-manageProvider\fP(8) , +\fBSAPHanaSR-alert-fencing\fP(8) , +\fBcrm\fP(8) , \fBcrm_attribute\fP(8) , \fBpython3\fP(8) , \fBkillall\fP(1) , .br https://help.sap.com/docs/SAP_HANA_PLATFORM?locale=en-US From 93833a56ca33e945828541a8b405cbc6272403eb Mon Sep 17 00:00:00 2001 From: lpinne Date: Mon, 1 Jul 2024 10:47:59 +0200 Subject: [PATCH 50/84] SAPHanaSR-ScaleOut_basic_cluster.7 SAPHanaSR_basic_cluster.7: IPAddr2 -> IPaddr2, removed example ocf_heartbeat_Filesystem --- man/SAPHanaSR-ScaleOut_basic_cluster.7 | 50 +++----------------------- man/SAPHanaSR_basic_cluster.7 | 8 ++--- 2 files changed, 8 insertions(+), 50 deletions(-) diff --git a/man/SAPHanaSR-ScaleOut_basic_cluster.7 b/man/SAPHanaSR-ScaleOut_basic_cluster.7 index 352f809a..02b44ba1 100644 --- a/man/SAPHanaSR-ScaleOut_basic_cluster.7 +++ b/man/SAPHanaSR-ScaleOut_basic_cluster.7 @@ -254,7 +254,7 @@ with the HANA primary master nameserver. .PP .RS 2 .br -primitive rsc_ip_SLE_HDB00 IPAddr2 \\ +primitive rsc_ip_SLE_HDB00 IPaddr2 \\ .br op monitor interval=10s timeout=20s \\ .br @@ -276,7 +276,7 @@ Note: This example works for two-node HANA scale-out. .PP .RS 2 .br -primitive rsc_ip_ro_SLE_HDB00 IPAddr2 \\ +primitive rsc_ip_ro_SLE_HDB00 IPaddr2 \\ .br op monitor interval=10s timeout=20s \\ .br @@ -303,8 +303,7 @@ for specific public cloud purpose. .\" TODO .PP .RS 2 -.br -primitive rsc_ip_SLE_HDB00 IPAddr2 \\ +primitive rsc_ip_SLE_HDB00 IPaddr2 \\ .br op monitor interval=10s timeout=20s \\ .br @@ -324,43 +323,6 @@ colocation col_ip_with_SLE_HDB00 \\ .RE .PP -\fB* crm NFS check resource configuration\fR - -In case of NFS failure, HANA might stop working but the Linux cluster might not -take action. To solve this, a dummy filesystem resource could be added. If this -filesystem reports monitor failures, the node gets fenced and a takeover is -initiated. The takeover will fail, if the HANA system replication is not in -sanc (srHook shows SFAIL). -.br -Note: See manual page ocf_suse_SAPHanaFilesystem(7) for a better solution. -.br -Note: Understand the impact before implementing. -.PP -.RS 2 -primitive rsc_fs_check_SLE_HDB00 Filesystem \\ -.br - params device="/hana/shared/SLE/check/" \\ - directory="/hana/shared/check/" fstype=nfs4 \\ -.br -options="bind,defaults,rw,hard,proto=tcp,intr,noatime,vers=4,lock" \\ -.br - op monitor interval=120 timeout=120 on-fail=fence \\ -.br - op_params OCF_CHECK_LEVEL=20 \\ -.br - op start interval=0 timeout=120 \\ -.br - op stop interval=0 timeout=120 -.br -clone cln_fs_check_SLE_HDB00 rsc_fs_check_SLE_HDB00 \\ -.br - meta clone-node-max=1 interleave=true -.br -location fs_check_not_on_majority_maker \\ - cln_fs_check_SLE_HDB00 -inf: vm-majority -.RE -.PP - \fB* check how resource stickiness affects promotion scoring\fR SAPHanaSR uses an internal scoring table. The promotion scores for HANA @@ -368,7 +330,6 @@ primary and secondary master are in a certain range. The scores used by the Linux cluster should be in the same range. .PP .RS 2 -.br # SAPHanaSR-showAttr | grep master.:master .br # crm_simulate -Ls | grep promotion @@ -382,7 +343,6 @@ After the root cause has been found and fixed, the failure message can be cleaned. .PP .RS 2 -.br # stonith_admin --cleanup --history= .RE .PP @@ -404,7 +364,6 @@ In case systemd-style init is used for the HANA database, the services can be checked. Example SID is HA1, instance number is 10. .PP .RS 2 -.br # systemctl list-unit-files | grep -i sap .br # systemctl status SAPHA1_10.service @@ -461,8 +420,7 @@ Please report any other feedback and suggestions to feedback@suse.com. .\" .SH SEE ALSO \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , -\fBocf_suse_SAPHanaFilesystem\fP(7) , -\fBocf_heartbeat_IPAddr2\fP(7) , \fBocf_heartbeat_Filesystem\fP(7) , +\fBocf_suse_SAPHanaFilesystem\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , \fBsbd\fP(8) , \fBstonith_sbd\fP(7) , \fBstonith_admin\fP(8) , \fBcrm_no_quorum_policy\fP(7) , \fBcrm\fP(8) , \fBcrm_simulate\fP(8) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBSAPHanaSR-showAttr\fP(7) , diff --git a/man/SAPHanaSR_basic_cluster.7 b/man/SAPHanaSR_basic_cluster.7 index 85ec5347..5ff08b17 100644 --- a/man/SAPHanaSR_basic_cluster.7 +++ b/man/SAPHanaSR_basic_cluster.7 @@ -225,7 +225,7 @@ with the HANA primary master nameserver. .PP .RS 2 .br -primitive rsc_ip_SLE_HDB00 IPAddr2 \\ +primitive rsc_ip_SLE_HDB00 IPaddr2 \\ .br op monitor interval=10 timeout=20 \\ .br @@ -246,7 +246,7 @@ along with the HANA secondary master nameserver. .PP .RS 2 .br -primitive rsc_ip_ro_SLE_HDB00 IPAddr2 \\ +primitive rsc_ip_ro_SLE_HDB00 IPaddr2 \\ .br op monitor interval=10 timeout=20 \\ .br @@ -278,7 +278,7 @@ HANA resource, you need to reduce that additional resource´s stickiness to 1. .PP .RS 2 .br -primitive rsc_ip_SLE_HDB00 IPAddr2 \\ +primitive rsc_ip_SLE_HDB00 IPaddr2 \\ .br op monitor interval=10s timeout=20s \\ .br @@ -423,7 +423,7 @@ Please report any other feedback and suggestions to feedback@suse.com. .\" .SH SEE ALSO \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , -\fBocf_heartbeat_IPAddr2\fP(7) , \fBocf_heartbeat_Filesystem\fP(7) , +\fBocf_suse_SAPHanaFilesystem\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , \fBocf_heartbeat_MailTo\fP(7) , \fBsbd\fP(8) , \fBstonith_sbd\fP(7) , \fBstonith_admin\fP(8) , \fBcrm_no_quorum_policy\fP(7) , \fBcrm\fP(8) , \fBcrm_simulate\fP(8) , From 4518bea50fe14b53e373e433658fdaebda336d63 Mon Sep 17 00:00:00 2001 From: lpinne Date: Mon, 1 Jul 2024 10:50:11 +0200 Subject: [PATCH 51/84] ocf_suse_SAPHanaController.7: see also SAPHanaSR-alert-fencing --- man/ocf_suse_SAPHanaController.7 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/ocf_suse_SAPHanaController.7 b/man/ocf_suse_SAPHanaController.7 index 63e34754..e1f73007 100644 --- a/man/ocf_suse_SAPHanaController.7 +++ b/man/ocf_suse_SAPHanaController.7 @@ -141,12 +141,12 @@ Defines how the RA escalates monitor failures on an HANA primary node. If srHook=SOK, in case of monitor failure an node fencing could be triggered. For srHook=SFAIL, the restart will be proceeded as usual. This option may speed up takeover on scale-up systems, depending on how long HANA needs for stopping. +For scale-out see also SAPHanaSR-alert-fencing(8). Values: [ proceed | fence ]. .br - proceed: proceed the failure as usual, i.e. initiate demote-stop sequence. .br - fence: trigger stop failure and node fencing, if conditions are matched. -.\" TODO: SAPHanaSR-alert-fencing .br Experimental (Optional). Default value: proceed. .RE From 2f63f3f2c4d1cc95131115061107a8636be44964 Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 2 Jul 2024 09:35:11 +0200 Subject: [PATCH 52/84] SAPHanaSR-alert-fencing.8: typo --- man/SAPHanaSR-alert-fencing.8 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index d8f19224..a13fc883 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -13,10 +13,10 @@ The Linux cluster provides an interface to take external action when a cluster event occurs (alert). Than the cluster calls an external program (an alert agent) to handle that alert. .PP -When the Linux cluster has performed a node fencing, it can call SAPHanaSR-agent -on each active cluster node. The agent checks whether the local node belongs to -the same HANA site as the fenced node. If so, it asks the cluster to fence the -local node as well. +When the Linux cluster has performed a node fencing, it can call +SAPHanaSR-alert-fencing on each active cluster node. The agent checks whether +the local node belongs to the same HANA site as the fenced node. If so, it asks +the cluster to fence the local node as well. .PP This improves three use cases for HANA scale-out: .br From 11b6630067575a163f21c713ca61cf47f0f547c3 Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 2 Jul 2024 10:16:55 +0200 Subject: [PATCH 53/84] ocf_suse_SAPHanaTopology.7: fixed description --- man/ocf_suse_SAPHanaTopology.7 | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/man/ocf_suse_SAPHanaTopology.7 b/man/ocf_suse_SAPHanaTopology.7 index 15da329f..ced3d55d 100644 --- a/man/ocf_suse_SAPHanaTopology.7 +++ b/man/ocf_suse_SAPHanaTopology.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH ocf_suse_SAPHanaTopology 7 "13 Dec 2023 "" "OCF resource agents" +.TH ocf_suse_SAPHanaTopology 7 "02 Jul 2024" "" "OCF resource agents" .\" .SH NAME SAPHanaTopology \- Helps to manage two SAP HANA databases with system replication. @@ -12,10 +12,10 @@ SAPHanaTopology \- Helps to manage two SAP HANA databases with system replicatio .\" .SH DESCRIPTION SAPHanaTopology is a resource agent (RA) that analyzes the SAP HANA topology -and "sends" all findings via the node status attributes to all nodes in the -cluster. These attributes are taken by the SAPHanaController RA to control the -SAP HANA databases. In addition SAPHanaTopology starts and monitors the local -saphostagent. +and "sends" all findings via cluster information base (CIB) attributes to all +nodes in the cluster. These attributes are taken by the SAPHanaController RA to +control the SAP HANA databases. In addition SAPHanaTopology starts and monitors +the local saphostagent. .PP The resource agent uses the following interfaces provided by SAP: .PP @@ -315,7 +315,7 @@ F.Herschel, L.Pinne. .br (c) 2015-2017 SUSE Linux GmbH, Germany. .br -(c) 2018-2023 SUSE LLC +(c) 2018-2024 SUSE LLC .br SAPHanaTopology comes with ABSOLUTELY NO WARRANTY. .br From a884cddfdcef5f953f3bd578062ebb49fbf7d10d Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 2 Jul 2024 15:01:15 +0200 Subject: [PATCH 54/84] SAPHanaSR-alert-fencing.8: typos --- man/SAPHanaSR-alert-fencing.8 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index a13fc883..4a06024c 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -9,11 +9,11 @@ SAPHanaSR-alert-fencing \- Alert agent for cluster fencing alerts. .SH DESCRIPTION SAPHanaSR-alert-fencing can be used to react on Linux cluster fencing alerts. .PP -The Linux cluster provides an interface to take external action when a cluster +The Linux cluster provides an interface to initiate external action when a cluster event occurs (alert). Than the cluster calls an external program (an alert agent) to handle that alert. .PP -When the Linux cluster has performed a node fencing, it can call +When the Linux cluster has performed an node fencing, it can call SAPHanaSR-alert-fencing on each active cluster node. The agent checks whether the local node belongs to the same HANA site as the fenced node. If so, it asks the cluster to fence the local node as well. @@ -100,7 +100,7 @@ Example node with failed fencing action is node22. .PP \fB*\fR Example for manually fencing an node. .PP -This could be done for testing the SAPHanaSR-alert-fencing agent integration. +This could be done for testing the SAPHanaSR-alert-fencing agent integration. This test should not be done on production systems. See manual page crm(8) for details. Fenced node is node1. @@ -152,7 +152,7 @@ Please report any other feedback and suggestions to feedback@suse.com. .SH SEE ALSO \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , -\fBsusChkSrv.py\fP (7) , \fBcrm\fP(8) , \fBsbd\fP(8) , +\fBsusChkSrv.py\fP(7) , \fBcrm\fP(8) , \fBsbd\fP(8) , .br https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/singlehtml/#alert-agents .PP From 31dea7f3c69d75bbc73f9c95e5558bc07db3743c Mon Sep 17 00:00:00 2001 From: lpinne Date: Tue, 2 Jul 2024 15:10:16 +0200 Subject: [PATCH 55/84] SAPHanaSR-alert-fencing.8: requirements --- man/SAPHanaSR-alert-fencing.8 | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index 4a06024c..30b1e072 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -127,7 +127,8 @@ config file for SBD daemon .SH REQUIREMENTS 1. Pacemaker 2.1.2 or newer. .PP -2. SAP HANA scale-out performance-optimized scenario. +2. SAP HANA scale-out performance-optimized scenario. No HANA host auto-failover, +thus no standby nodes. .PP 3. Only one SID is controlled by the Linux cluster. .PP @@ -140,7 +141,10 @@ SBD_START_MODE. In case of automatic restart of just fenced nodes, it might be necessary to adapt SBD_START_DELAY in order to avoid fencing loops. See manual page sbd(8). .PP -7. The alert agent runtime almost completely depends on call-outs to OS and +7. Fencing is executed unconditionally. The alert agent relies on the preceding +fencing decision. Neither site role nor SR state is checked. +.PP +8. The alert agent runtime almost completely depends on call-outs to OS and Linux cluster. .\" .SH BUGS From 9fe9a51d68f82752100d65af580f107607957efd Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 3 Jul 2024 09:23:50 +0200 Subject: [PATCH 56/84] susChkSrv.py.7: typos --- man/susChkSrv.py.7 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/man/susChkSrv.py.7 b/man/susChkSrv.py.7 index 5726365f..b11f98f6 100644 --- a/man/susChkSrv.py.7 +++ b/man/susChkSrv.py.7 @@ -76,7 +76,7 @@ HANA needs to release all OS resources prior to the automated registering. .br - \fBfence\fP: do 'crm node fence <\fIhost\fR>'. This needs a Linux cluster STONITH method and sudo permission. This action is primarily meant for scale-up. -For scale-out, SAPHanaSR-agent-fencing should be configured additionionally, see +For scale-out, SAPHanaSR-agent-fencing should be configured additionally, see manual page SAPHanaSR-agent-fencing(8) for details. .br .\" TODO - suicide: do 'systemctl reboot'. Do NOT use this! @@ -113,7 +113,7 @@ See also SAPHanaSR_basic_cluster(7). .br Optional. Default is 20 seconds. .TP -* The "HA/DR providers" API accepts the following parameter for the trace section in globnal.ini: +* The "HA/DR providers" API accepts the following parameter for the trace section in global.ini: .TP \fB[trace]\fP .TP @@ -430,8 +430,6 @@ to release all OS resources prior to the registering attempt. .PP 10. For HANA scale-out, the susChkSrv.py parameter 'action_on_lost=fence' should be used only, if the SAPHanaSR-alert-fencing is configured. -.\" 10. If an HANA worker node of a scale-out site got fenced but not the master -.\" nameserver, the time needed for stopping the whole site depends on HANA timeouts. .PP 11. If the hook provider should be pre-compiled, the particular Python version that comes with SAP HANA has to be used. From 777f7a7f5a2f76835929f3faea49e8e480258295 Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 4 Jul 2024 10:03:35 +0200 Subject: [PATCH 57/84] SAPHanaSR-alert-fencing.8: requirements --- man/SAPHanaSR-alert-fencing.8 | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index 30b1e072..83e0eb9d 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -63,9 +63,9 @@ The following lines needs to be added to the cluster´s CIB: .RS 2 alert fencing-1 "/usr/bin/SAPHanaSR-alert-fencing" \\ .br -select fencing \\ + select fencing \\ .br -attributes alert_fencing_delay=300 + attributes alert_fencing_delay=300 .RE .PP \fB*\fR Example for configuring the alert agent by using crm. @@ -136,15 +136,17 @@ thus no standby nodes. .PP 5. No other alert agent should be configured for the fencing alert. .PP -6. Automatic restart of just fenced nodes should be disabled by adapting +6. SAPHanaFilesystem RA with monitor operations is active. +.PP +7. Automatic restart of just fenced nodes should be disabled by adapting SBD_START_MODE. In case of automatic restart of just fenced nodes, it might be necessary to adapt SBD_START_DELAY in order to avoid fencing loops. See manual page sbd(8). .PP -7. Fencing is executed unconditionally. The alert agent relies on the preceding +8. Fencing is executed unconditionally. The alert agent relies on the preceding fencing decision. Neither site role nor SR state is checked. .PP -8. The alert agent runtime almost completely depends on call-outs to OS and +9. The alert agent runtime almost completely depends on call-outs to OS and Linux cluster. .\" .SH BUGS From a7d904432236599679c3a629a7ee287d4cc809a4 Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 4 Jul 2024 14:18:51 +0200 Subject: [PATCH 58/84] SAPHanaSR-tester.7: added classic tests --- man-tester/SAPHanaSR-tester.7 | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/man-tester/SAPHanaSR-tester.7 b/man-tester/SAPHanaSR-tester.7 index 8ad40cce..2d7daa2d 100644 --- a/man-tester/SAPHanaSR-tester.7 +++ b/man-tester/SAPHanaSR-tester.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-tester 7 "29 Feb 2024" "" "SAPHanaSR-angi" +.TH SAPHanaSR-tester 7 "04 Jul 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-tester \- Functional testing for SAPHanaSR clusters. @@ -248,8 +248,9 @@ Please report any other feedback and suggestions to feedback@suse.com. .\" .SH SEE ALSO \fBSAPHanaSR-testCluster\fP(8) , \fBSAPHanaSR-tests-syntax\fP(5) , -\fBSAPHanaSR-tests-description\fP(7) , +\fBSAPHanaSR-tests-description\fP(7) , \fBSAPHanaSR-tests-angi-ScaleUp\fP(7) , \fBSAPHanaSR-tests-angi-ScaleOut\fP(7) , +\fBSAPHanaSR-tests-classic-ScaleUp\fP(7) , \fBSAPHanaSR-tests-classic-ScaleOut\fP(7) , \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , \fBcrm_mon\fP(8) , \fBssh-keygen\fP(1) , \fBssh-copy-id\fP(1) , .br From 5420b8dae53154079525aecd1dab6a42702361dc Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 4 Jul 2024 14:25:12 +0200 Subject: [PATCH 59/84] SAPHanaSR-tester.7: added classic tests --- man-tester/SAPHanaSR-tester.7 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man-tester/SAPHanaSR-tester.7 b/man-tester/SAPHanaSR-tester.7 index 2d7daa2d..c933b477 100644 --- a/man-tester/SAPHanaSR-tester.7 +++ b/man-tester/SAPHanaSR-tester.7 @@ -250,7 +250,7 @@ Please report any other feedback and suggestions to feedback@suse.com. \fBSAPHanaSR-testCluster\fP(8) , \fBSAPHanaSR-tests-syntax\fP(5) , \fBSAPHanaSR-tests-description\fP(7) , \fBSAPHanaSR-tests-angi-ScaleUp\fP(7) , \fBSAPHanaSR-tests-angi-ScaleOut\fP(7) , -\fBSAPHanaSR-tests-classic-ScaleUp\fP(7) , \fBSAPHanaSR-tests-classic-ScaleOut\fP(7) , +\fBSAPHanaSR-tests-classic-ScaleUp\fP(7) , \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , \fBcrm_mon\fP(8) , \fBssh-keygen\fP(1) , \fBssh-copy-id\fP(1) , .br From 3370ff1ee5346f122818186a8e2492325156e576 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 4 Jul 2024 16:22:07 +0200 Subject: [PATCH 60/84] SAPHanaSR-alert-fencing.8: updatig parameter name --- man/SAPHanaSR-alert-fencing.8 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index d8f19224..3313c86c 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -38,8 +38,8 @@ If the alert agent does not complete within this amount of time, it will be term .\" \fBenabled\fR .\" If false for an alert, the alert will not be used. If true for an alert and false for a particular recipient of that alert, that recipient will not be used. Optional, default "true". .TP -\fBalert_fencing_delay\fR -How long a node must be up and running (uptime) before fencing alerts will be processed. This avoids fencing loops. Optional, default "300". Example "attributes alert_fencing_delay=300". +\fBalert_uptime_threshold\fR +How long a node must be up and running (uptime) before fencing alerts will be processed. This avoids fencing loops. Optional, default "300". Example "attributes alert_uptime_threshold=300". .\" .PP .\" @@ -65,7 +65,7 @@ alert fencing-1 "/usr/bin/SAPHanaSR-alert-fencing" \\ .br select fencing \\ .br -attributes alert_fencing_delay=300 +attributes alert_uptime_threshold=300 .RE .PP \fB*\fR Example for configuring the alert agent by using crm. From b933c65ae2e33892819a1fe8c36682620d488f81 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 4 Jul 2024 16:27:54 +0200 Subject: [PATCH 61/84] SAPHanaSR-alert-fencing, SAPHanaSR-alert-fencing.8: updatig parameter name --- man/SAPHanaSR-alert-fencing.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index 3b1342c2..a589cbd8 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -65,7 +65,7 @@ alert fencing-1 "/usr/bin/SAPHanaSR-alert-fencing" \\ .br select fencing \\ .br - attributes alert_fencing_delay=300 + attributes alert_uptime_threshold=300 .RE .PP \fB*\fR Example for configuring the alert agent by using crm. From 427f43ca73392bac313baad7416c0869ca322b09 Mon Sep 17 00:00:00 2001 From: lpinne Date: Thu, 4 Jul 2024 16:37:46 +0200 Subject: [PATCH 62/84] SAPHanaSR-alert-fencing.8: example --- man/SAPHanaSR-alert-fencing.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index a589cbd8..6a6f91d8 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -79,7 +79,7 @@ Alternate way for configuring the alert agent. \fB*\fR Showing all configured alert agents. .PP .RS 2 -# crm configure show | grep -A1 alert +# crm configure show type:alert .RE .PP \fB*\fR Showing agent messages. From 7c2f5ccbf71c2a060aaf509f8fde5b32bfb6573a Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Thu, 4 Jul 2024 17:16:53 +0200 Subject: [PATCH 63/84] angi: removed old fuction saphana_init_handle_update(); RAG=3.0 --- ra/saphana-common-lib | 2 +- ra/saphana-controller-lib | 99 --------------------------------------- 2 files changed, 1 insertion(+), 100 deletions(-) diff --git a/ra/saphana-common-lib b/ra/saphana-common-lib index 2b533de3..78571166 100755 --- a/ra/saphana-common-lib +++ b/ra/saphana-common-lib @@ -131,7 +131,7 @@ function core_init() { SAPSTARTPROFILE="" # Resource Agent Generation # shellcheck disable=SC2034 - RAG="2.0" + RAG="3.0" SAPHanaFilter='ra-act-dec-lpa' super_ocf_log info "RA saphana_common_lib_version=$saphana_common_lib_version" set +o posix # disable possix mode of the bash diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 15a5af48..34db1a92 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -239,105 +239,6 @@ function saphana_methods() { return "$rc" } # end function saphana_methods -function saphana_init_handle_update() { - # called by: ?? (not found) - # TODO PRIO2: check, if we still need this - # handle RA update state - node_updated=$(get_hana_attribute "${NODENAME}" "${ATTR_NAME_HANA_GRA[@]}") - onode_site=$(get_hana_attribute "${NODENAME}" "${ATTR_NAME_HANA_SITE[@]}") - if [ -n "$onode_site" ] && [ "$node_updated" != "$RAG" ]; then - # only set on HANA nodes, skip the decision maker / tiebreaker - # attribute is empty or set by an older RA generation - # update attribute to the new RA generation - node_updated="$RAG" - set_hana_attribute "${NODENAME}" "${node_updated}" "${ATTR_NAME_HANA_GRA[@]}" - fi - cluster_update_state=$(get_hana_attribute "X" "${ATTR_NAME_HANA_UPD_STATE[@]}") - # evaluate and update/set the update state of the cluster only on a master - # nameserver to prevent race conditions - if is_master_nameserver; then - nrOfNodes=1 - updNodes=1 - for onode in "${otherNodes[@]}"; do - onode_site=$(get_hana_attribute "$onode" "${ATTR_NAME_HANA_SITE[@]}") - if [ -z "$onode_site" ]; then - # not a HANA node, maybe the decision maker / tiebreaker - continue - fi - (( nrOfNodes++ )) - onode_updated=$(get_hana_attribute "$onode" "${ATTR_NAME_HANA_GRA[@]}") - if [ -n "$onode_updated" ]; then - if [ "$onode_updated" == "$RAG" ]; then - # only nodes with an equal RA generation as the local - # node are considered as 'updated nodes' - (( updNodes++ )) - fi - fi - done - if [ "$nrOfNodes" == "$updNodes" ]; then - updState="ok" - else - # oldNodes=$(("$nrOfNodes" - "$updNodes")) # TODO PRIO2: NG - check usage of this variable (oldNodes) - updState="nok" - fi - # if cluster_update_state is empty or the stored value does not match the - # current running state 'updState' update the attribute and the variable - if [ "$cluster_update_state" != "$updState" ]; then - set_hana_attribute "X" "$updState" "${ATTR_NAME_HANA_UPD_STATE[@]}" - cluster_update_state="$updState" - fi - fi - super_ocf_log info "UPD: cluster update state is '$cluster_update_state'" - # if NOT all cluster nodes are updated to the new RA, we still working - # with the old, global Hook attribute - if [ "$cluster_update_state" == "ok" ]; then - # all cluster nodes are now updated to the new RA, now check, if - # we can/need/should migrate the SRHook attribute - # check which Hook generation is used on all cluster nodes - srHook_gen=$(get_hana_attribute "${NODENAME}" "${ATTR_NAME_HANA_SRHOOK_GEN[@]}") - if [ -z "$srHook_gen" ]; then - # we are on the new RA package, but the new srHook code is currently - # not active, no reload or restart of srHook on HANA side - super_ocf_log info "HOOK: RA saphana_init - on the local cluster node '${NODENAME}' the srHook generation attribute is empty. May be the new srHook is currently not active, no reloaded or no restart of srHook on HANA side was done" - fi - for onode in "${otherNodes[@]}"; do - onode_site=$(get_hana_attribute "$onode" "${ATTR_NAME_HANA_SITE[@]}") - if [ -z "$onode_site" ]; then - # not a HANA node, maybe the decision maker / tiebreaker - continue - fi - onode_srHook_gen=$(get_hana_attribute "$onode" "${ATTR_NAME_HANA_SRHOOK_GEN[@]}") - if [ -z "$onode_srHook_gen" ]; then - # we are on the new RA package, but the new srHook code is - # currently not active, no reload or restart of srHook on HANA - # side - super_ocf_log info "HOOK: RA saphana_init - on cluster node '$onode' the srHook generation attribute is empty. May be the new srHook is currently not active, no reloaded or no restart of srHook on HANA side was done" - fi - if [ "$srHook_gen" != "$onode_srHook_gen" ]; then - # the cluster nodes are running different Hook generations - super_ocf_log info "HOOK: RA saphana_init - the cluster nodes '$NODENAME' and '$onode' are running different Hook generations ('$srHook_gen' - '$onode_srHook_gen')." - fi - done - # be in mind: it may be that not all nodes running the same Hook generation - multiTargetSupport=$(get_hana_attribute "X" "${ATTR_NAME_HANA_multiTargetSupport[@]}") - if [ -z "$multiTargetSupport" ]; then - # cluster attribute 'hana_${sid}_glob_mts' not set - super_ocf_log info "RA: multiTargetSupport attribute not set. May be no Hook is configured or the old-style Hook is used." - fi - if ocf_is_true "$multiTargetSupport"; then - super_ocf_log info "RA: multiTargetSupport attribute is set to 'true'" - if [ "$srHook_gen" == "2.0" ]; then - # TODO PRIO1: NG - ATTR_NAME_HANA_GLOB_SRHOOK needs to be ATTR_NAME_HANA_SITE_SRHOOK - old_sync=$(get_hana_attribute "X" "${ATTR_NAME_HANA_GLOB_SRHOOK[@]}") - if [ -n "$old_sync" ]; then - # old, global attribute still exists - super_ocf_log info "RA: The global Hook attribute is still available. Use cmd 'SAPHanaSR-manageAttr' to remove this attribute" - fi - fi - fi - fi -} # end function saphana_init_handle_update - function saphana_init_sap_paths() { # function: saphana_init_sap_paths - set variables used for SAP paths (directories, config files and executables) # globals: TODO OCF_RESKEY_DIR_EXECUTABLE, SID, InstanceName, DIR_EXECUTABLE, SAPSTARTSRV, SAPCONTROL, OCF_RESKEY_DIR_PROFILE, SAPVIRHOST From e67618f9842aa3ba72518600acd69145f241141a Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 5 Jul 2024 09:21:15 +0200 Subject: [PATCH 64/84] SAPHanaSR_upgrade_to_angi.7: fixed changing attributes --- man/SAPHanaSR_upgrade_to_angi.7 | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/man/SAPHanaSR_upgrade_to_angi.7 b/man/SAPHanaSR_upgrade_to_angi.7 index 237c73ea..025f6227 100644 --- a/man/SAPHanaSR_upgrade_to_angi.7 +++ b/man/SAPHanaSR_upgrade_to_angi.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR_upgrade_to_angi 7 "02 Jun 2024" "" "SAPHanaSR" +.TH SAPHanaSR_upgrade_to_angi 7 "04 Jul 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR_upgrade_to_angi \- How to upgrade from SAPHanaSR or SAPHanaSR-ScaleOut to SAPHanaSR-angi. @@ -77,8 +77,8 @@ hana__site_opMode_ hana__site_srMode_ .br hana__site_srPoll_ -.br -TODO vhost remoteHost +.\" .br +.\" TODO vhost remoteHost .RE .PP \fB*\fR What will be changed for SAP HANA scale-out scenarios? @@ -96,18 +96,19 @@ c. Tools are placed in /usr/bin/ instead of /usr/sbin/. .br d. Node attributes will be removed. .RS 4 -gra +hana__gra .br -gsh -TODO +hana__gsh .RE e. Site and global attributes will be removed from property SAPHanaSR. .RS 4 -mts -upd +hana__glob_mts +.br +hana__glob_upd +.br hana__glob_sync_state +.br hana__glob_srHook (in case of obsolete scale-out SAPHanaSR.py) -TODO .RE f. Site and global attributes will be added to property SAPHanaSR. .RS 4 @@ -124,8 +125,6 @@ hana__site_srr_ hana__site_srMode_ .br hana__site_srPoll_ -.br -TODO .RE .PP \fB*\fR How does the upgrade procedure look like at a glance? From bf8ba336ddd32eed9bbff9f81fcb8ae1819a694b Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 5 Jul 2024 13:08:23 +0200 Subject: [PATCH 65/84] angi: test/json/angi-ScaleOut/kill_prim_node.json - indentation --- test/json/angi-ScaleOut/kill_prim_node.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/json/angi-ScaleOut/kill_prim_node.json b/test/json/angi-ScaleOut/kill_prim_node.json index 3445fa23..08095c99 100644 --- a/test/json/angi-ScaleOut/kill_prim_node.json +++ b/test/json/angi-ScaleOut/kill_prim_node.json @@ -88,7 +88,7 @@ "sSite": "pSiteUp", "pHost": "sHostUp", "sHost": "pHostUp", - "pWorker": "sWorkerUp", + "pWorker": "sWorkerUp", "sWorker": "pWorkerUp" } ] From 007e761e4357f0f89a81f0f087e37f6fc3d61551 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 5 Jul 2024 13:09:45 +0200 Subject: [PATCH 66/84] angi: ra - improved handling for timeout-return-codes (>=124, <124) --- ra/saphana-controller-common-lib | 8 ++++---- ra/saphana-controller-lib | 2 +- ra/saphana-topology-lib | 10 +++++----- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ra/saphana-controller-common-lib b/ra/saphana-controller-common-lib index 11055fdd..5482e04e 100755 --- a/ra/saphana-controller-common-lib +++ b/ra/saphana-controller-common-lib @@ -559,15 +559,15 @@ function get_hana_landscape_status() { else super_ocf_log info "RUNTIME do NOT use cached value for lss return code (cache_mode=$cache_mode, g_cache_lss=$g_cache_lss)" hana_LSS_Out=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); rc=$? - if [[ "$rc" == 124 ]]; then + if [[ "$rc" -ge 124 ]]; then # TODO: PRIO 1: Check, if we should loop here like 'for i in 1 2 3 ...' ? # landscape timeout - super_ocf_log warn "RA: landscapeHostConfiguration.py TIMEOUT after $HANA_CALL_TIMEOUT seconds" + super_ocf_log warn "RA: landscapeHostConfiguration.py TIMEOUT after $HANA_CALL_TIMEOUT seconds (rc=$rc)" sleep 20 # shellcheck disable=SC2034 hana_LSS_Out=$(HANA_CALL --timeout "$HANA_CALL_TIMEOUT" --cmd "python landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); rc=$? - if [ "$rc" == 124 ]; then - super_ocf_log warn "RA: landscapeHostConfiguration.py second TIMEOUT after $HANA_CALL_TIMEOUT seconds" + if [ "$rc" -ge 124 ]; then + super_ocf_log warn "RA: landscapeHostConfiguration.py second TIMEOUT after $HANA_CALL_TIMEOUT seconds (rc=$rc)" # TODO PRIO2: How to handle still hanging lss - current solution is to say "FATAL" - Maybe we should return the stored attribute value? rc=0 fi diff --git a/ra/saphana-controller-lib b/ra/saphana-controller-lib index 34db1a92..dac3ce5c 100755 --- a/ra/saphana-controller-lib +++ b/ra/saphana-controller-lib @@ -161,7 +161,7 @@ function saphana_print_parameters() { Define timeout how long a call to HANA to receive information can take. Define timeout how long a call to HANA to receive information can take. This could be eg landscapeHostConfiguration.py. There are some specific calls to HANA which have their own timeout values. For example the takeover command does not timeout (inf). - If the timeout is reached, the return code will be 124. If you increase the timeouts for HANA calls you should also adjust the operation timeouts + If the timeout is reached, the return code will be 124 or 137 (for kill -9). If you increase the timeouts for HANA calls you should also adjust the operation timeouts of your cluster resources. diff --git a/ra/saphana-topology-lib b/ra/saphana-topology-lib index 9915c418..60916c7a 100755 --- a/ra/saphana-topology-lib +++ b/ra/saphana-topology-lib @@ -113,7 +113,7 @@ SAPHanaTopology scans the output table of landscapeHostConfiguration.py to ident Define timeout how long a call to HANA to receive information can take. Define timeout how long a call to HANA to receive information can take. This could be eg landscapeHostConfiguration.py. There are some specific calls to HANA which have their own timeout values. For example the takeover command does not timeout (inf). - If the timeout is reached, the return code will be 124. If you increase the timeouts for HANA calls you should also adjust the operation timeouts + If the timeout is reached, the return code will be 124 or 137 (for kill -9). If you increase the timeouts for HANA calls you should also adjust the operation timeouts of your cluster resources. @@ -283,7 +283,7 @@ function sht_start() { start_saphostagent fi gNodeRole="$( get_role_by_landscape "$gVirtName")"; hanalrc="$?" - if [[ "$hanalrc" != 124 ]]; then + if [[ "$hanalrc" -lt 124 ]]; then set_hana_attribute "${NODENAME}" "$gNodeRole" "${ATTR_NAME_HANA_ROLES[@]}" fi # TODO PRIO 1: scale-out used side-effect via RC_hdbnsutil to give back different return codes; scale-up rc was always OCF_SUCCESS @@ -451,7 +451,7 @@ function sht_stop_clone() { fi get_local_virtual_name gNodeRole="$( get_role_by_landscape "$gVirtName" --timeout="$timeout")"; hanalrc="$?" - if [[ "$hanalrc" != "124" ]]; then + if [[ "$hanalrc" -lt "124" ]]; then # normal exit, use gNodeRole tout=0 set_hana_attribute "${NODENAME}" "$gNodeRole" "${ATTR_NAME_HANA_ROLES[@]}" @@ -519,7 +519,7 @@ function sht_monitor_clone() { done g_cache_lss="$hanalrc" super_ocf_log info "DEC: gNodeRole=$gNodeRole gTopology=$gTopology hanalrc=$g_cache_lss" - if [[ "$hanalrc" != "124" ]]; then + if [[ "$hanalrc" -lt "124" ]]; then # normal exit, use gNodeRole super_ocf_log info "DEC: gNodeRole=$gNodeRole" set_hana_attribute "${NODENAME}" "$gNodeRole" "${ATTR_NAME_HANA_ROLES[@]}" @@ -536,7 +536,7 @@ function sht_monitor_clone() { super_ocf_log info "DEC: set_hana_attribute ${NODENAME} $gSite ${ATTR_NAME_HANA_SITE[0]}" fi # TODO PRIO2: NG - COULD/SHOULD WE LIMIT THE SET OF THE LSS/SRR ATTRIBUTE TO ONLY THE_MASTER nodes? - # ignore timeout (124) and "ignore" (5) as return code from the landscapeHostConfiguration call + # ignore timeout (124, 137) and "ignore" (5) as return code from the landscapeHostConfiguration call case "$hanaPrim" in P ) ;; S ) # only secondary may propagate its sync status From 2a6280d3746cd8d6406749e339c42289370c1cde Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 5 Jul 2024 14:51:16 +0200 Subject: [PATCH 67/84] angi: tester angi-ScaleOut/defaults.json - fixed score attribures --- test/json/angi-ScaleOut/defaults.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/json/angi-ScaleOut/defaults.json b/test/json/angi-ScaleOut/defaults.json index 3491593e..0f9de39c 100644 --- a/test/json/angi-ScaleOut/defaults.json +++ b/test/json/angi-ScaleOut/defaults.json @@ -58,7 +58,7 @@ "pWorkerUp": [ "clone_state == DEMOTED", "roles == slave:slave:worker:slave", - "score == -12200" + "score == -10000" ], "sWorkerUp": [ "clone_state == DEMOTED", From 93a4f705bf94536ea7b1e56e71abc0cd8f65a985 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 5 Jul 2024 14:54:41 +0200 Subject: [PATCH 68/84] angi: tester angi-ScaleOut/kill_prim_node_fencing_alert.json - test added for fencing alert --- .../kill_prim_node_fencing_alert.json | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json diff --git a/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json b/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json new file mode 100644 index 00000000..ef3b871f --- /dev/null +++ b/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json @@ -0,0 +1,119 @@ +{ + "test": "kill_prim_node fencing alert", + "name": "Kill primary master node with fencing alert agent implemented", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp", + "sWorker": "sWorkerUp", + "pWorker": "pWorkerUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", + "srPoll == PRIM" + ], + "sSite": [ + "lpt >~ 1000000000:30", + "lss == 4", + "srr ~ (S|P)", + "srHook ~ (PRIM|SOK)", + "srPoll ~ (SOK|SFAIL)" + ], + "pHost": [ + ], + "sHost": [ + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" + ] + }, + { + "step": "step30", + "name": "pmaster fenced", + "next": "step40", + "loop": 120, + "wait": 2, + "pHost": [ + "clone_state is None", + "role is None", + "score is None" + ] + }, + { + "step": "step40", + "name": "pworker fenced", + "next": "step50", + "loop": 120, + "wait": 2, + "pWorker": [ + "clone_state is None", + "role is None", + "score is None" + ] + }, + { + "step": "step50", + "name": "begin recover", + "next": "final60", + "loop": 300, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss ~ (1|2)", + "srr ~ (P|S)", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", + "srPoll ~ (PRIM|SFAIL)" + ], + "sSite": [ + "lpt >~ 1000000000:30", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM", + "srPoll ~ (SOK|PRIM)" + ], + "pHost": [ + "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)", + "roles == master1::worker:" + ], + "sHost": [ + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145|150)" + ] + }, + { + "step": "final60", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp", + "pWorker": "sWorkerUp", + "sWorker": "pWorkerUp" + } + ] +} From 05cb35bad8619e4a0d514a5d085b5ae6b67a215f Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 5 Jul 2024 15:01:18 +0200 Subject: [PATCH 69/84] SAPHanaSR-tests-angi-ScaleOut.7 SAPHanaSR-tests-description.7: kill_prim_node_fencing_alert --- man-tester/SAPHanaSR-tests-angi-ScaleOut.7 | 9 +++++--- man-tester/SAPHanaSR-tests-description.7 | 24 ++++++++++++++++++++-- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 b/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 index 0fc4d4b7..c603fb3c 100644 --- a/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 +++ b/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 @@ -47,10 +47,10 @@ Kill primary master indexserver, for susChkSrv.py. Kill primary master instance. .TP \fBkill_prim_node\fP -Kill primary master node. +Kill primary master node (no fencing alert agent configured). .TP -\fBkill_prim_site\fP -Kill secondary site nodes. Not yet implemented. +\fBkill_prim_node_fencing_alert\fP +Kill primary master node (fencing alert agent configured). .TP \fBkill_prim_worker_indexserver\fP Kill primary worker indexserver, for susChkSrv.py. @@ -61,6 +61,9 @@ Kill primary worker instance. \fBkill_prim_worker_node\fP Kill primary worker node. .TP +\fBkill_prim_site\fP +Kill primary site nodes. Not yet implemented. +.TP \fBkill_secn_indexserver\fP Kill secondary master indexserver, for susChkSrv.py. .TP diff --git a/man-tester/SAPHanaSR-tests-description.7 b/man-tester/SAPHanaSR-tests-description.7 index 23ea68ed..8b466f82 100644 --- a/man-tester/SAPHanaSR-tests-description.7 +++ b/man-tester/SAPHanaSR-tests-description.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-tests-description 7 "04 Jan 2024" "" "SAPHanaSR-angi" +.TH SAPHanaSR-tests-description 7 "04 Jul 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-tests-description \- Functional tests for SAPHanaSR. @@ -264,7 +264,7 @@ Comment: Application failure, main cluster case. .PP \fBkill_prim_node\fP .RS 2 -Descr: Kill primary node. +Descr: Kill primary node (no fencing alert agent configured). On scale-out, kill primary master node. .br Topology: ScaleUp, ScaleOut. @@ -282,6 +282,26 @@ One takeover. One fencing. Comment: Node failure, main cluster case. .RE .PP +\fBkill_prim_node_fencing_alert\fP +.RS 2 +Descr: Kill primary node (fencing alert agent configured). +On scale-out, kill primary master node. +.br +Topology: ScaleOut. +.br +Prereq: Cluster and HANA are up and running, all good. +.br +Test: systemctl reboot --force +.br +Expect: All primary nodes fenced and finally started as secondary. +HANA primary stopped and finally started as secondary. +HANA secondary becomes finally primary. +SR SFAIL and finally SOK. +One takeover. One fencing for each primary node. +.br +Comment: Node failure, main cluster case. +.RE +.PP \fBkill_prim_worker_indexserver\fP .RS 2 Descr: Kill primary worker indexserver, for susChkSrv.py. From 0dec0dbebabb3dc249066e20bce94621bdd27209 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 5 Jul 2024 15:19:59 +0200 Subject: [PATCH 70/84] angi: kill_prim_node_fencing_alert.json - test added --- test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json b/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json index ef3b871f..54118663 100644 --- a/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json +++ b/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json @@ -67,10 +67,10 @@ "clone_state is None", "role is None", "score is None" - ] - }, - { - "step": "step50", + ] + }, + { + "step": "step50", "name": "begin recover", "next": "final60", "loop": 300, From ba22a4865ca65b6b984e89ac2aa2c338dbd306fd Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Fri, 5 Jul 2024 15:20:38 +0200 Subject: [PATCH 71/84] angi: kill_prim_worker_node_fencing_alert.json - test added --- .../kill_prim_worker_node_fencing_alert.json | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 test/json/angi-ScaleOut/kill_prim_worker_node_fencing_alert.json diff --git a/test/json/angi-ScaleOut/kill_prim_worker_node_fencing_alert.json b/test/json/angi-ScaleOut/kill_prim_worker_node_fencing_alert.json new file mode 100644 index 00000000..4115211f --- /dev/null +++ b/test/json/angi-ScaleOut/kill_prim_worker_node_fencing_alert.json @@ -0,0 +1,119 @@ +{ + "test": "kill_prim_worker_node fencing alert", + "name": "Kill primary worker node with fencing agent implemented", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_worker_node", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp", + "sWorker": "sWorkerUp", + "pWorker": "pWorkerUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 120, + "wait": 2, + "pSite": [ + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", + "srPoll == PRIM" + ], + "sSite": [ + "lpt >~ 1000000000:30", + "lss == 4", + "srr ~ (S|P)", + "srHook ~ (PRIM|SOK)", + "srPoll ~ (SOK|SFAIL)" + ], + "pHost": [ + ], + "sHost": [ + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" + ] + }, + { + "step": "step30", + "name": "pworker fenced", + "next": "step40", + "loop": 120, + "wait": 2, + "pWorker": [ + "clone_state is None", + "role is None", + "score is None" + ] + }, + { + "step": "step40", + "name": "pmaster fenced", + "next": "step50", + "loop": 120, + "wait": 2, + "pHost": [ + "clone_state is None", + "role is None", + "score is None" + ] + }, + { + "step": "step50", + "name": "begin recover", + "next": "final60", + "loop": 300, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss ~ (1|2)", + "srr ~ (P|S)", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", + "srPoll ~ (PRIM|SFAIL)" + ], + "sSite": [ + "lpt >~ 1000000000:30", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM", + "srPoll ~ (SOK|PRIM)" + ], + "pHost": [ + "clone_state ~ (UNDEFINED|DEMOTED|WAITING4NODES)", + "roles == master1::worker:" + ], + "sHost": [ + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145|150)" + ] + }, + { + "step": "final60", + "name": "end recover", + "next": "END", + "loop": 300, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp", + "pWorker": "sWorkerUp", + "sWorker": "pWorkerUp" + } + ] +} From d329ba29824a3a2daa7473d4555f291238c03f1a Mon Sep 17 00:00:00 2001 From: lpinne Date: Fri, 5 Jul 2024 15:26:47 +0200 Subject: [PATCH 72/84] SAPHanaSR-tests-angi-ScaleOut.7 SAPHanaSR-tests-description.7: kill_prim_worker_node_fencing_alert --- man-tester/SAPHanaSR-tests-angi-ScaleOut.7 | 5 ++++- man-tester/SAPHanaSR-tests-description.7 | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 b/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 index c603fb3c..efce76f9 100644 --- a/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 +++ b/man-tester/SAPHanaSR-tests-angi-ScaleOut.7 @@ -59,7 +59,10 @@ Kill primary worker indexserver, for susChkSrv.py. Kill primary worker instance. .TP \fBkill_prim_worker_node\fP -Kill primary worker node. +Kill primary worker node (no fencing alert agent configured). +.TP +\fBkill_prim_worker_node_fencing_alert\fP +Kill primary master node (fencing alert agent configured). .TP \fBkill_prim_site\fP Kill primary site nodes. Not yet implemented. diff --git a/man-tester/SAPHanaSR-tests-description.7 b/man-tester/SAPHanaSR-tests-description.7 index 8b466f82..ab4e9d28 100644 --- a/man-tester/SAPHanaSR-tests-description.7 +++ b/man-tester/SAPHanaSR-tests-description.7 @@ -357,6 +357,25 @@ One takeover. One fencing. Comment: Node failure, main cluster case. .RE .PP +\fBkill_prim_worker_node_fencing_alert\fP +.RS 2 +Descr: Kill primary worker node (fencing alert agent configured).. +.br +Topology: ScaleOut. +.br +Prereq: Cluster and HANA are up and running, all good. +.br +Test: systemctl reboot --force +.br +Expect: All primary nodes fenced. +HANA primary stopped and finally started as secondary. +HANA secondary becomes finally primary. +SR SFAIL and finally SOK. +One takeover. One fencing for each primary node. +.br +Comment: Node failure, main cluster case. +.RE +.PP \fBkill_secn_indexserver\fP .RS 2 Descr: Kill secondary indexserver, for susChkSrv.py. From 0905aafdf2900ba8aad74a82378b448ddb56e77f Mon Sep 17 00:00:00 2001 From: lpinne Date: Sat, 6 Jul 2024 11:14:06 +0200 Subject: [PATCH 73/84] SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7: initial checkin --- ...PHanaSR-tests-angi-ScaleUp_on-fail-fence.7 | 86 +++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 diff --git a/man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 b/man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 new file mode 100644 index 00000000..dadd765f --- /dev/null +++ b/man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 @@ -0,0 +1,86 @@ +.\" Version: 1.001 +.\" +.TH SAPHanaSR-tests-angi-ScaleUp 7 "06 Jul 2024" "" "SAPHanaSR-angi" +.\" +.SH NAME +SAPHanaSR-tests-angi-ScaleUp_on-fail-fence \- Functional tests for SAPHanaSR Scale-Up with immediate fencing. +.PP +.\" +.SH DESCRIPTION +.PP +Functional test are shipped for scale-up scenarios when configured for immediate +fencing. See manual page ocf_suse_SAPHanaController(7) and susChkSrv.py(7) for +the respective configuration details. This tests could be run +out-of-the-box. The test cases are defined in dedicated files. +See manual page SAPHanaSR-tests-syntax(5) for syntax details. Details like +performed steps or expected behaviour of cluster and HANA are explained in +SAPHanaSR-tests-description(7). +.PP +Predefined functional tests specific for scale-up with immediate fencing overview: +.TP +\fBkill_prim_indexserver_onfail_fence\fP +Kill primary indexserver, for susChkSrv.py. +.TP +\fBkill_prim_inst_onfail_fence\fP +Kill primary instance. +.TP +\fBkill_secn_indexserver_onfail_fence\fP +Kill secondary indexserver, for susChkSrv.py. +.TP +\fBkill_secn_inst_onfail_fence\fP +Kill secondary instance. +.PP +.\" +.SH EXAMPLES +.PP +* List tests for SAPHanaSR-angi scale-up with immediate fencing scenarios +.PP +.RS 2 +# ls /usr/share/SAPHanaSR-tester/json/angi-ScaleUp/*fenc* +.RE +.PP +.\" +.SH FILES +.\" +.TP +/usr/share/SAPHanaSR-tester/json/angi-ScaleUp/ +functional tests for SAPHanaSR-angi scale-up scenarios. +.TP +/usr/bin/sct_test_* +shell scripts for un-easy tasks on the cluster nodes. +.PP +.\" +.SH REQUIREMENTS +.\" +See the REQUIREMENTS section in SAPHanaSR-tester(7) and SAPHanaSR-angi(7). +See also ocf_suse_SAPHanaController(7) and susChkSrv.py(7) for configuration. +Of course, HANA database and Linux cluster have certain requirements. +Please refer to the product documentation. +.PP +.\" +.SH BUGS +In case of any problem, please use your favourite SAP support process to open +a request for the component BC-OP-LNX-SUSE. +Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +\fBSAPHanaSR-tester\fP(7) , \fBSAPHanaSR-testCluster\fP(8) , +\fBSAPHanaSR-tests-description\fP(7) , \fBSAPHanaSR-tests-syntax\fP(5) , +\fBSAPHanaSR-tests-angi-ScaleUp\fP(7) , +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8), +\fBocf_suse_SAPHanaController\fP(7) , \fBsusChkSrv.py\fP(7) +.PP +.\" +.SH AUTHORS +F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +(c) 2024 SUSE LLC +.br +The package SAPHanaSR-tester comes with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" From b26e27fda9c1f23a2208622038431d15d28d9e24 Mon Sep 17 00:00:00 2001 From: lpinne Date: Sat, 6 Jul 2024 11:28:06 +0200 Subject: [PATCH 74/84] SAPHanaSR-tests-angi-ScaleOut_on-fail-fence.7: initial checkin --- ...HanaSR-tests-angi-ScaleOut_on-fail-fence.7 | 119 ++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 man-tester/SAPHanaSR-tests-angi-ScaleOut_on-fail-fence.7 diff --git a/man-tester/SAPHanaSR-tests-angi-ScaleOut_on-fail-fence.7 b/man-tester/SAPHanaSR-tests-angi-ScaleOut_on-fail-fence.7 new file mode 100644 index 00000000..b38ee3e6 --- /dev/null +++ b/man-tester/SAPHanaSR-tests-angi-ScaleOut_on-fail-fence.7 @@ -0,0 +1,119 @@ +.\" Version: 1.001 +.\" +.TH SAPHanaSR-tests-angi-ScaleOut 7 "06 Jul 2024" "" "SAPHanaSR-angi" +.\" +.SH NAME +SAPHanaSR-tests-angi-ScaleOut_on-fail-fence \- Functional tests for SAPHanaSR Scale-Out with immediate fencing. +.PP +.\" +.SH DESCRIPTION +.PP +Functional test are shipped for the scale-out ERP scenario when configured for +immediate fencing. See manual page ocf_suse_SAPHanaController(7), susChkSrv.py(7) +nd SAPHanaSR-alert-fencing(8) for the respective configuration details. This +tests could be run out-of-the-box. The test cases are defined in dedicated files. +See manual page SAPHanaSR-tests-syntax(5) for syntax details. Details like +performed steps or expected behaviour of cluster and HANA are explained in +SAPHanaSR-tests-description(7). + +Each test can be executed by running the command SAPHanaSR-testCluster with +appropriate parameters. See manual page SAPHanaSR-testCluster(8). +.PP +Predefined functional tests specific for scale-out ERP with immediate fencing overwiev: +.TP +\fBfreeze_prim_master_nfs_fencing_alert\fP +Freeze HANA NFS on primary master node. +.TP +\fBkill_prim_indexserver_on_fail_fence\fP +Kill primary master indexserver, for susChkSrv.py. +.TP +\fBkill_prim_inst\fP +Kill primary master instance. +.TP +\fBkill_prim_node_fencing_alert\fP +Kill primary master node (fencing alert agent configured). +.TP +\fBkill_prim_worker_indexserver_on_fail_fence\fP +Kill primary worker indexserver, for susChkSrv.py. +.TP +\fBkill_prim_worker_inst\fP +Kill primary worker instance. +.TP +\fBkill_prim_worker_node\fP +Kill primary worker node (no fencing alert agent configured). +.TP +\fBkill_prim_worker_node_fencing_alert\fP +Kill primary master node (fencing alert agent configured). +.TP +\fBkill_secn_indexserver\fP +Kill secondary master indexserver, for susChkSrv.py. +.TP +\fBkill_secn_inst\fP +Kill secondary master instance. +.TP +\fBkill_secn_node\fP +Kill secondary master node. +.TP +\fBkill_secn_worker_inst\fP +Kill secondary worker instance. +.TP +\fBkill_secn_worker_node\fP +Kill secondary worker node. +.TP +\fBkill_secn_site\fP +Kill secondary site nodes. Not yet implemented. +.PP +.\" +.SH EXAMPLES +.PP +* List tests for SAPHanaSR-angi scale-out ERP with immediate fencing scenarios +.PP +.RS 2 +# ls /usr/share/SAPHanaSR-tester/json/angi-ScaleOut/*fenc* +.RE +.PP +.\" +.SH FILES +.TP +/usr/share/SAPHanaSR-tester/json/angi-ScaleOut/ +functional tests for SAPHanaSR-angi scale-out ERP scenarios. +.TP +/usr/bin/sct_test_* +shell scripts for un-easy tasks on the cluster nodes. +.PP +.\" +.SH REQUIREMENTS +.PP +See the REQUIREMENTS section in SAPHanaSR-tester(7) and SAPHanaSR-angi(7). +See also ocf_suse_SAPHanaController(7), susChkSrv.py(7) and +SAPHanaSR-alert-fencing(8) for configuration. +Of course, HANA database and Linux cluster have certain requirements. +Please refer to the product documentation. +.\" +.SH BUGS +In case of any problem, please use your favourite SAP support process to open +a request for the component BC-OP-LNX-SUSE. +Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +\fBSAPHanaSR-tester\fP(7) , \fBSAPHanaSR-testCluster\fP(8) , +\fBSAPHanaSR-tests-description\fP(7) , \fBSAPHanaSR-tests-syntax\fP(5) , +\fBSAPHanaSR-tests-angi-ScaleOu\fP(7) , +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , +\fBocf_suse_SAPHanaController\fP(7) , \fBsusChkSrv.py\fP(7) , +\fBSAPHanaSR-alert-fencing\fP(8) +.PP +.\" +.SH AUTHORS +F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +(c) 2024 SUSE LLC +.br +The package SAPHanaSR-tester comes with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" From ea76c8a68572c12503688e25cbfa419b8d6d43f2 Mon Sep 17 00:00:00 2001 From: lpinne Date: Sat, 6 Jul 2024 11:28:18 +0200 Subject: [PATCH 75/84] SAPHanaSR-tests-angi-ScaleOut_on-fail-fence.7: initial checkin --- man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 b/man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 index dadd765f..f6e2eb43 100644 --- a/man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 +++ b/man-tester/SAPHanaSR-tests-angi-ScaleUp_on-fail-fence.7 @@ -68,7 +68,7 @@ Please report any other feedback and suggestions to feedback@suse.com. \fBSAPHanaSR-tester\fP(7) , \fBSAPHanaSR-testCluster\fP(8) , \fBSAPHanaSR-tests-description\fP(7) , \fBSAPHanaSR-tests-syntax\fP(5) , \fBSAPHanaSR-tests-angi-ScaleUp\fP(7) , -\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8), +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-showAttr\fP(8) , \fBocf_suse_SAPHanaController\fP(7) , \fBsusChkSrv.py\fP(7) .PP .\" From e7e64fdaaebab82f653cde86036976158283ed9a Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 08:35:49 +0200 Subject: [PATCH 76/84] angi tester: added kill_prim_indexserver_fencing_alert.json --- .../kill_prim_indexserver_fencing_alert.json | 122 ++++++++++++++++++ test/json/angi-ScaleOut/kill_prim_node.json | 4 +- .../kill_prim_node_fencing_alert.json | 4 +- 3 files changed, 126 insertions(+), 4 deletions(-) create mode 100644 test/json/angi-ScaleOut/kill_prim_indexserver_fencing_alert.json diff --git a/test/json/angi-ScaleOut/kill_prim_indexserver_fencing_alert.json b/test/json/angi-ScaleOut/kill_prim_indexserver_fencing_alert.json new file mode 100644 index 00000000..8c3fa794 --- /dev/null +++ b/test/json/angi-ScaleOut/kill_prim_indexserver_fencing_alert.json @@ -0,0 +1,122 @@ +{ + "test": "kill_prim_indexserver_fencing_alert", + "name": "Kill primary master indexserver with fencing alert agent implemented", + "start": "prereq10", + "steps": [ + { + "step": "prereq10", + "name": "test prerequitsites", + "next": "step20", + "loop": 1, + "wait": 1, + "post": "kill_prim_indexserver", + "pSite": "pSiteUp", + "sSite": "sSiteUp", + "pHost": "pHostUp", + "sHost": "sHostUp", + "pWorker": "pWorkerUp", + "sWorker": "sWorkerUp" + }, + { + "step": "step20", + "name": "failure detected", + "next": "step30", + "loop": 180, + "wait": 2, + "comment": "sSite: srPoll could get SFAIL on scale-out", + "pSite": [ + "lss ~ (1|2)", + "srr == P", + "lpt >~ 1000000000:20", + "srHook ~ (PRIM|SWAIT|SREG)", + "srPoll == PRIM" + ], + "sSite": [ + "lpt >~ 1000000000:30", + "lss == 4", + "srr == S", + "srHook ~ (PRIM|SOK)", + "srPoll ~ (SOK|SFAIL)" + ], + "pHost": [ + ], + "sHost": [ + "clone_state ~ (PROMOTED|DEMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)" + ] + }, + { + "step": "step30", + "name": "pmaster fenced", + "next": "step40", + "loop": 120, + "wait": 2, + "pHost": [ + "clone_state is None", + "role is None", + "score is None" + ] + }, + { + "step": "step40", + "name": "pworker fenced", + "next": "step50", + "loop": 120, + "wait": 2, + "pWorker": [ + "clone_state is None", + "role is None", + "score is None" + ] + }, + { + "step": "step50", + "name": "begin recover", + "next": "final60", + "loop": 120, + "wait": 2, + "todo": "pHost+sHost to check site-name", + "pSite": [ + "lss == 1", + "srr == P", + "lpt >~ 1000000000:(30|20|10)", + "srHook ~ (PRIM|SWAIT|SREG)", + "srPoll == PRIM" + ], + "sSite": [ + "lpt >~ 1000000000:30", + "lss == 4", + "srr ~ (S|P)", + "srHook == PRIM", + "srPoll ~ (SOK|SFAIL)" + ], + "pHost": [ + "clone_state ~ (UNDEFINED|DEMOTED)", + "roles == master1::worker:", + "score ~ (90|70|5)" + ], + "sHost": [ + "clone_state ~ (DEMOTED|PROMOTED)", + "roles == master1:master:worker:master", + "score ~ (100|145)", + "srah == T" + ] + }, + { + "step": "final60", + "name": "end recover", + "next": "END", + "loop": 360, + "wait": 2, + "post": "cleanup", + "remark": "pXXX and sXXX are now exchanged", + "pSite": "sSiteUp", + "sSite": "pSiteUp", + "pHost": "sHostUp", + "sHost": "pHostUp", + "pWorker": "sWorkerUp", + "sWorker": "pWorkerUp" + } + ] +} diff --git a/test/json/angi-ScaleOut/kill_prim_node.json b/test/json/angi-ScaleOut/kill_prim_node.json index 08095c99..d0ce73b2 100644 --- a/test/json/angi-ScaleOut/kill_prim_node.json +++ b/test/json/angi-ScaleOut/kill_prim_node.json @@ -14,8 +14,8 @@ "sSite": "sSiteUp", "pHost": "pHostUp", "sHost": "sHostUp", - "sWorker": "sWorkerUp", - "pWorker": "pWorkerUp" + "pWorker": "pWorkerUp", + "sWorker": "sWorkerUp" }, { "step": "step20", diff --git a/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json b/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json index 54118663..d106bb76 100644 --- a/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json +++ b/test/json/angi-ScaleOut/kill_prim_node_fencing_alert.json @@ -14,8 +14,8 @@ "sSite": "sSiteUp", "pHost": "pHostUp", "sHost": "sHostUp", - "sWorker": "sWorkerUp", - "pWorker": "pWorkerUp" + "pWorker": "pWorkerUp", + "sWorker": "sWorkerUp" }, { "step": "step20", From b22ebacc2be0391656f1218548f78eca1a11e951 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 08:39:56 +0200 Subject: [PATCH 77/84] tester: move helper scripts in bin subdirectory --- SAPHanaSR-tester.spec | 4 ++-- test/{ => bin}/sct_showlog | 0 test/{ => bin}/sct_test_block_sap_hana_sr | 0 test/{ => bin}/sct_test_create_cluster_config | 0 test/{ => bin}/sct_test_delete_cluster_config | 0 test/{ => bin}/sct_test_free_log_area | 0 test/{ => bin}/sct_test_freeze_prim_fs | 0 test/{ => bin}/sct_test_freeze_prim_master_nfs | 0 test/{ => bin}/sct_test_freeze_prim_site_nfs | 0 test/{ => bin}/sct_test_freeze_secn_site_nfs | 0 test/{ => bin}/sct_test_maintenance_cluster_hana_running | 0 test/{ => bin}/sct_test_maintenance_cluster_turn_hana | 0 test/{ => bin}/sct_test_properties | 0 test/{ => bin}/sct_test_restart_cluster | 0 test/{ => bin}/sct_test_restart_cluster_hana_running | 0 test/{ => bin}/sct_test_restart_cluster_turn_hana | 0 test/{ => bin}/sct_test_unblock_sap_hana_sr | 0 test/{ => bin}/sct_test_unfreeze_prim_fs | 0 test/{ => bin}/sct_test_unfreeze_prim_master_nfs | 0 test/{ => bin}/sct_test_unfreeze_prim_site_nfs | 0 20 files changed, 2 insertions(+), 2 deletions(-) rename test/{ => bin}/sct_showlog (100%) rename test/{ => bin}/sct_test_block_sap_hana_sr (100%) rename test/{ => bin}/sct_test_create_cluster_config (100%) rename test/{ => bin}/sct_test_delete_cluster_config (100%) rename test/{ => bin}/sct_test_free_log_area (100%) rename test/{ => bin}/sct_test_freeze_prim_fs (100%) rename test/{ => bin}/sct_test_freeze_prim_master_nfs (100%) rename test/{ => bin}/sct_test_freeze_prim_site_nfs (100%) rename test/{ => bin}/sct_test_freeze_secn_site_nfs (100%) rename test/{ => bin}/sct_test_maintenance_cluster_hana_running (100%) rename test/{ => bin}/sct_test_maintenance_cluster_turn_hana (100%) rename test/{ => bin}/sct_test_properties (100%) rename test/{ => bin}/sct_test_restart_cluster (100%) rename test/{ => bin}/sct_test_restart_cluster_hana_running (100%) rename test/{ => bin}/sct_test_restart_cluster_turn_hana (100%) rename test/{ => bin}/sct_test_unblock_sap_hana_sr (100%) rename test/{ => bin}/sct_test_unfreeze_prim_fs (100%) rename test/{ => bin}/sct_test_unfreeze_prim_master_nfs (100%) rename test/{ => bin}/sct_test_unfreeze_prim_site_nfs (100%) diff --git a/SAPHanaSR-tester.spec b/SAPHanaSR-tester.spec index 12b2e430..eb042cd1 100644 --- a/SAPHanaSR-tester.spec +++ b/SAPHanaSR-tester.spec @@ -20,7 +20,7 @@ License: GPL-2.0 Group: Productivity/Clustering/HA AutoReqProv: on Summary: Test suite for SAPHanaSR clusters -Version: 1.2.13 +Version: 1.2.14 Release: 0 Url: https://www.suse.com/c/fail-safe-operation-of-sap-hana-suse-extends-its-high-availability-solution/ @@ -80,7 +80,7 @@ install -m 0644 test/saphana_sr_test.py %{buildroot}/usr/lib/%{name} install -m 0755 test/cs_* %{buildroot}/usr/bin install -m 0755 test/callTest* %{buildroot}/usr/bin install -m 0755 test/loopTests* %{buildroot}/usr/bin -install -m 0755 test/sct_* %{buildroot}/usr/bin +install -m 0755 test/bin/sct_* %{buildroot}/usr/bin # client files install -m 0755 tools/SAPHanaSR-showAttr %{buildroot}/usr/bin diff --git a/test/sct_showlog b/test/bin/sct_showlog similarity index 100% rename from test/sct_showlog rename to test/bin/sct_showlog diff --git a/test/sct_test_block_sap_hana_sr b/test/bin/sct_test_block_sap_hana_sr similarity index 100% rename from test/sct_test_block_sap_hana_sr rename to test/bin/sct_test_block_sap_hana_sr diff --git a/test/sct_test_create_cluster_config b/test/bin/sct_test_create_cluster_config similarity index 100% rename from test/sct_test_create_cluster_config rename to test/bin/sct_test_create_cluster_config diff --git a/test/sct_test_delete_cluster_config b/test/bin/sct_test_delete_cluster_config similarity index 100% rename from test/sct_test_delete_cluster_config rename to test/bin/sct_test_delete_cluster_config diff --git a/test/sct_test_free_log_area b/test/bin/sct_test_free_log_area similarity index 100% rename from test/sct_test_free_log_area rename to test/bin/sct_test_free_log_area diff --git a/test/sct_test_freeze_prim_fs b/test/bin/sct_test_freeze_prim_fs similarity index 100% rename from test/sct_test_freeze_prim_fs rename to test/bin/sct_test_freeze_prim_fs diff --git a/test/sct_test_freeze_prim_master_nfs b/test/bin/sct_test_freeze_prim_master_nfs similarity index 100% rename from test/sct_test_freeze_prim_master_nfs rename to test/bin/sct_test_freeze_prim_master_nfs diff --git a/test/sct_test_freeze_prim_site_nfs b/test/bin/sct_test_freeze_prim_site_nfs similarity index 100% rename from test/sct_test_freeze_prim_site_nfs rename to test/bin/sct_test_freeze_prim_site_nfs diff --git a/test/sct_test_freeze_secn_site_nfs b/test/bin/sct_test_freeze_secn_site_nfs similarity index 100% rename from test/sct_test_freeze_secn_site_nfs rename to test/bin/sct_test_freeze_secn_site_nfs diff --git a/test/sct_test_maintenance_cluster_hana_running b/test/bin/sct_test_maintenance_cluster_hana_running similarity index 100% rename from test/sct_test_maintenance_cluster_hana_running rename to test/bin/sct_test_maintenance_cluster_hana_running diff --git a/test/sct_test_maintenance_cluster_turn_hana b/test/bin/sct_test_maintenance_cluster_turn_hana similarity index 100% rename from test/sct_test_maintenance_cluster_turn_hana rename to test/bin/sct_test_maintenance_cluster_turn_hana diff --git a/test/sct_test_properties b/test/bin/sct_test_properties similarity index 100% rename from test/sct_test_properties rename to test/bin/sct_test_properties diff --git a/test/sct_test_restart_cluster b/test/bin/sct_test_restart_cluster similarity index 100% rename from test/sct_test_restart_cluster rename to test/bin/sct_test_restart_cluster diff --git a/test/sct_test_restart_cluster_hana_running b/test/bin/sct_test_restart_cluster_hana_running similarity index 100% rename from test/sct_test_restart_cluster_hana_running rename to test/bin/sct_test_restart_cluster_hana_running diff --git a/test/sct_test_restart_cluster_turn_hana b/test/bin/sct_test_restart_cluster_turn_hana similarity index 100% rename from test/sct_test_restart_cluster_turn_hana rename to test/bin/sct_test_restart_cluster_turn_hana diff --git a/test/sct_test_unblock_sap_hana_sr b/test/bin/sct_test_unblock_sap_hana_sr similarity index 100% rename from test/sct_test_unblock_sap_hana_sr rename to test/bin/sct_test_unblock_sap_hana_sr diff --git a/test/sct_test_unfreeze_prim_fs b/test/bin/sct_test_unfreeze_prim_fs similarity index 100% rename from test/sct_test_unfreeze_prim_fs rename to test/bin/sct_test_unfreeze_prim_fs diff --git a/test/sct_test_unfreeze_prim_master_nfs b/test/bin/sct_test_unfreeze_prim_master_nfs similarity index 100% rename from test/sct_test_unfreeze_prim_master_nfs rename to test/bin/sct_test_unfreeze_prim_master_nfs diff --git a/test/sct_test_unfreeze_prim_site_nfs b/test/bin/sct_test_unfreeze_prim_site_nfs similarity index 100% rename from test/sct_test_unfreeze_prim_site_nfs rename to test/bin/sct_test_unfreeze_prim_site_nfs From 2b1cb8e2c3a26313f4bf768bf6c60d85af9d129a Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 10:32:57 +0200 Subject: [PATCH 78/84] tester: loop scripts moved to bin --- test/{ => bin}/loopTests01 | 0 test/{ => bin}/loopTests02 | 0 test/{ => bin}/loopTests03 | 0 test/{ => bin}/loopTests04 | 0 test/{ => bin}/loopTests04.json | 0 test/querySteps.py | 70 --------------------------------- 6 files changed, 70 deletions(-) rename test/{ => bin}/loopTests01 (100%) rename test/{ => bin}/loopTests02 (100%) rename test/{ => bin}/loopTests03 (100%) rename test/{ => bin}/loopTests04 (100%) rename test/{ => bin}/loopTests04.json (100%) delete mode 100644 test/querySteps.py diff --git a/test/loopTests01 b/test/bin/loopTests01 similarity index 100% rename from test/loopTests01 rename to test/bin/loopTests01 diff --git a/test/loopTests02 b/test/bin/loopTests02 similarity index 100% rename from test/loopTests02 rename to test/bin/loopTests02 diff --git a/test/loopTests03 b/test/bin/loopTests03 similarity index 100% rename from test/loopTests03 rename to test/bin/loopTests03 diff --git a/test/loopTests04 b/test/bin/loopTests04 similarity index 100% rename from test/loopTests04 rename to test/bin/loopTests04 diff --git a/test/loopTests04.json b/test/bin/loopTests04.json similarity index 100% rename from test/loopTests04.json rename to test/bin/loopTests04.json diff --git a/test/querySteps.py b/test/querySteps.py deleted file mode 100644 index 723cc914..00000000 --- a/test/querySteps.py +++ /dev/null @@ -1,70 +0,0 @@ -import sys, json; - -data=json.load(sys.stdin); - -def slashIt(area, object, key, val): - print('{}/{}/{}="{}"'.format(area, object, key, val)) - return 0 - -def loopCondition(conditions, area, object, key): - for cond in conditions: - slashIt(area, object, key, cond) - return 0; - -testID= data['test']; -testName= data['name']; -testSID="SUS" -testMstResource="mst_SAPHanaCon_{}_HDB00".format(testSID) -try: - testSID=data['sid']; - testMstResource=data['mstResource']; -except: - print("info: test {} is missing either sid or mstResource definition".format(testID)) - -steps=data['steps']; -start=data['start']; - -slashIt("Tests",testID,"name",testName) -slashIt("Tests",testID,"start",start) -slashIt("Tests",testID,"sid",testSID) -slashIt("Tests",testID,"mst",testMstResource) - -for step in data['steps']: - try: - stepID=step['step'] - stepName=step['name'] - stepNext=step['next'] - stepLoop=step['loop'] - stepIntv=step['wait'] - except: - print("step {} missing mandatory definitions (step, name, next, loop, wait)".format(stepID)) - try: - stepPost=step['post'] - except: - stepPost="" - slashIt("Steps",testID+"-"+stepID, "name", stepName) - slashIt("Steps",testID+"-"+stepID, "next", stepNext) - slashIt("Steps",testID+"-"+stepID, "loop", stepLoop) - slashIt("Steps",testID+"-"+stepID, "wait", stepIntv) - slashIt("Steps",testID+"-"+stepID, "post", stepPost) - try: - pSite=step['pSite'] - loopCondition(pSite,"Steps",testID+"-"+stepID,"pSite") - except: - print("test {} is missing pSite definition".format(stepID)) - try: - sSite=step['sSite'] - loopCondition(sSite,"Steps",testID+"-"+stepID,"sSite") - except: - print("test {} is missing sSite definition".format(stepID)) - try: - pHost=step['pHost'] - loopCondition(pHost,"Steps",testID+"-"+stepID,"pHost") - except: - print("test {} is missing pHost definition".format(stepID)) - try: - sHost=step['sHost'] - loopCondition(sHost,"Steps",testID+"-"+stepID,"sHost") - except: - print("test {} is missing sHost definition".format(stepID)) - From 29efff98d2bbc2047473a86477a559edfaa19a65 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 10:35:01 +0200 Subject: [PATCH 79/84] tester: added runtest example call to bin --- test/bin/runtest | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 test/bin/runtest diff --git a/test/bin/runtest b/test/bin/runtest new file mode 100644 index 00000000..6613c033 --- /dev/null +++ b/test/bin/runtest @@ -0,0 +1,39 @@ +#!/bin/bash +# runtests +# 2024-07-04 + +PROPERTIES="./properties_angi-ScaleOut_hoeferspitze.json" +REMOTENODES="hoeferspitze11 hoeferspitze21" +ASTROOT=/usr/share/SAPHanaSR-tester/json/angi-ScaleOut +LOGFILE="runtests.log" + +exec 3>&1 +exec 1>>"$LOGFILE" + +SECNCAS="kill_secn_inst kill_secn_indexserver kill_secn_node standby_secn_node standby_secn_worker_node free_log_area" +PRIMCAS="kill_prim_inst kill_prim_worker_inst kill_prim_indexserver kill_prim_node kill_prim_worker_node freeze_prim_master_nfs standby_prim_node" +BOTHCAS="restart_cluster restart_cluster_hana_running restart_cluster_turn_hana maintenance_cluster_turn_hana maintenance_with_standby_nodes nop" + +#TCASES="flup kill_prim_node_fencing_alert kill_prim_worker_node_fencing_alert free_log_area" +TCASES="flup kill_prim_node free_log_area kill_prim_worker_node free_log_area" +#TCASES="flup free_log_area $SECNCAS $PRIMCAS free_log_area $BOTHCAS free_log_area" + +for TEST in $TCASES; do + echo "#### $(date +%Y-%m-%d" "%H:%M:%S) run test: $TEST ####" + for N in $REMOTENODES; do + hana_fs="/hana" + hana_fill=$(ssh $N "df --output=pcent $hana_fs | grep -v Use") + echo "$(date +%Y-%m-%d\ %H:%M:%S) fill grade: $N $hana_fs $hana_fill" + done + SAPHanaSR-testCluster --testFile "$ASTROOT/$TEST.json" \ + --remoteNodes $REMOTENODES --defaultsFile "$ASTROOT/defaults.json" \ + --logFile "$TEST.log" --properties "$PROPERTIES" --dumpFailures >&3; rc=$? + echo "#### $(date +%Y-%m-%d" "%H:%M:%S) end test: $TEST rc: $rc ####" + if [[ "$rc" != "0" ]]; then + exit 1 + else + sleep 10 + fi +done +exit 0 +# From 6d7f68d3c83906b38d885ab99b318789264f96cd Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 10:35:45 +0200 Subject: [PATCH 80/84] tester: moved filter and fix* --- test/{ => bin}/filter_sap_trc_by_time | 0 test/{ => misc}/fix_indent | 0 test/{ => misc}/fix_indent.txt | 0 test/misc/querySteps.py | 70 +++++++++++++++++++++++++++ 4 files changed, 70 insertions(+) rename test/{ => bin}/filter_sap_trc_by_time (100%) rename test/{ => misc}/fix_indent (100%) rename test/{ => misc}/fix_indent.txt (100%) create mode 100644 test/misc/querySteps.py diff --git a/test/filter_sap_trc_by_time b/test/bin/filter_sap_trc_by_time similarity index 100% rename from test/filter_sap_trc_by_time rename to test/bin/filter_sap_trc_by_time diff --git a/test/fix_indent b/test/misc/fix_indent similarity index 100% rename from test/fix_indent rename to test/misc/fix_indent diff --git a/test/fix_indent.txt b/test/misc/fix_indent.txt similarity index 100% rename from test/fix_indent.txt rename to test/misc/fix_indent.txt diff --git a/test/misc/querySteps.py b/test/misc/querySteps.py new file mode 100644 index 00000000..723cc914 --- /dev/null +++ b/test/misc/querySteps.py @@ -0,0 +1,70 @@ +import sys, json; + +data=json.load(sys.stdin); + +def slashIt(area, object, key, val): + print('{}/{}/{}="{}"'.format(area, object, key, val)) + return 0 + +def loopCondition(conditions, area, object, key): + for cond in conditions: + slashIt(area, object, key, cond) + return 0; + +testID= data['test']; +testName= data['name']; +testSID="SUS" +testMstResource="mst_SAPHanaCon_{}_HDB00".format(testSID) +try: + testSID=data['sid']; + testMstResource=data['mstResource']; +except: + print("info: test {} is missing either sid or mstResource definition".format(testID)) + +steps=data['steps']; +start=data['start']; + +slashIt("Tests",testID,"name",testName) +slashIt("Tests",testID,"start",start) +slashIt("Tests",testID,"sid",testSID) +slashIt("Tests",testID,"mst",testMstResource) + +for step in data['steps']: + try: + stepID=step['step'] + stepName=step['name'] + stepNext=step['next'] + stepLoop=step['loop'] + stepIntv=step['wait'] + except: + print("step {} missing mandatory definitions (step, name, next, loop, wait)".format(stepID)) + try: + stepPost=step['post'] + except: + stepPost="" + slashIt("Steps",testID+"-"+stepID, "name", stepName) + slashIt("Steps",testID+"-"+stepID, "next", stepNext) + slashIt("Steps",testID+"-"+stepID, "loop", stepLoop) + slashIt("Steps",testID+"-"+stepID, "wait", stepIntv) + slashIt("Steps",testID+"-"+stepID, "post", stepPost) + try: + pSite=step['pSite'] + loopCondition(pSite,"Steps",testID+"-"+stepID,"pSite") + except: + print("test {} is missing pSite definition".format(stepID)) + try: + sSite=step['sSite'] + loopCondition(sSite,"Steps",testID+"-"+stepID,"sSite") + except: + print("test {} is missing sSite definition".format(stepID)) + try: + pHost=step['pHost'] + loopCondition(pHost,"Steps",testID+"-"+stepID,"pHost") + except: + print("test {} is missing pHost definition".format(stepID)) + try: + sHost=step['sHost'] + loopCondition(sHost,"Steps",testID+"-"+stepID,"sHost") + except: + print("test {} is missing sHost definition".format(stepID)) + From 0e94fcabd7338a5a90cd13e622b1027687d6375a Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 13:19:26 +0200 Subject: [PATCH 81/84] tester: clean-up base directory --- test/call_any | 12 ------------ test/{ => misc}/README.saphanasrlib.txt | 0 test/{ => misc}/README.txt | 0 3 files changed, 12 deletions(-) delete mode 100644 test/call_any rename test/{ => misc}/README.saphanasrlib.txt (100%) rename test/{ => misc}/README.txt (100%) diff --git a/test/call_any b/test/call_any deleted file mode 100644 index cfafebdb..00000000 --- a/test/call_any +++ /dev/null @@ -1,12 +0,0 @@ -export TROOT=$PWD/json/angi-ScaleUp/ -TEST="$1"; shift -# remotes="localhorst1 localhorst2 192.168.178.1 localhost" -remotes="localhost" -#remotes="127.0.0.1" -./SAPHanaSR-testCluster \ - --testFile "$TROOT/$TEST".json \ - --remoteNodes $remotes \ - --defaultsFile "$TROOT"/defaults.json \ - --properties ./properties.json \ - --logFile test.log \ - $@ diff --git a/test/README.saphanasrlib.txt b/test/misc/README.saphanasrlib.txt similarity index 100% rename from test/README.saphanasrlib.txt rename to test/misc/README.saphanasrlib.txt diff --git a/test/README.txt b/test/misc/README.txt similarity index 100% rename from test/README.txt rename to test/misc/README.txt From 31e72a677c2601d74d37c141021dec677d2095f5 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 13:22:15 +0200 Subject: [PATCH 82/84] tester: clean-up base directory 2 --- test/{ => bin}/callTest-multiNode | 0 test/{ => bin}/cs_ssh | 0 test/{ => bin}/disp_sql_counter | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename test/{ => bin}/callTest-multiNode (100%) rename test/{ => bin}/cs_ssh (100%) rename test/{ => bin}/disp_sql_counter (100%) diff --git a/test/callTest-multiNode b/test/bin/callTest-multiNode similarity index 100% rename from test/callTest-multiNode rename to test/bin/callTest-multiNode diff --git a/test/cs_ssh b/test/bin/cs_ssh similarity index 100% rename from test/cs_ssh rename to test/bin/cs_ssh diff --git a/test/disp_sql_counter b/test/bin/disp_sql_counter similarity index 100% rename from test/disp_sql_counter rename to test/bin/disp_sql_counter From d4aeaad93bd63d65afb26e9fa674eb616b084116 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 13:24:55 +0200 Subject: [PATCH 83/84] tester: clean-up base directory 3 --- test/properties.json | 1 - 1 file changed, 1 deletion(-) delete mode 120000 test/properties.json diff --git a/test/properties.json b/test/properties.json deleted file mode 120000 index 4184f996..00000000 --- a/test/properties.json +++ /dev/null @@ -1 +0,0 @@ -json/angi-ScaleUp/properties.json \ No newline at end of file From 883bf90f456da5a640092218bfb153fb674ca6d0 Mon Sep 17 00:00:00 2001 From: Fabian Herschel Date: Mon, 8 Jul 2024 13:30:41 +0200 Subject: [PATCH 84/84] tester: clean-up base directory 4 --- test/bin/filter_sap_trc_by_time | 0 test/bin/runtest | 0 test/{ => tester}/SAPHanaSR-checkJson | 0 test/{ => tester}/SAPHanaSR-testCluster | 0 test/{ => tester}/SAPHanaSR-testCluster-html | 0 test/{ => tester}/saphana_sr_test.py | 0 6 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 test/bin/filter_sap_trc_by_time mode change 100644 => 100755 test/bin/runtest rename test/{ => tester}/SAPHanaSR-checkJson (100%) rename test/{ => tester}/SAPHanaSR-testCluster (100%) rename test/{ => tester}/SAPHanaSR-testCluster-html (100%) rename test/{ => tester}/saphana_sr_test.py (100%) diff --git a/test/bin/filter_sap_trc_by_time b/test/bin/filter_sap_trc_by_time old mode 100644 new mode 100755 diff --git a/test/bin/runtest b/test/bin/runtest old mode 100644 new mode 100755 diff --git a/test/SAPHanaSR-checkJson b/test/tester/SAPHanaSR-checkJson similarity index 100% rename from test/SAPHanaSR-checkJson rename to test/tester/SAPHanaSR-checkJson diff --git a/test/SAPHanaSR-testCluster b/test/tester/SAPHanaSR-testCluster similarity index 100% rename from test/SAPHanaSR-testCluster rename to test/tester/SAPHanaSR-testCluster diff --git a/test/SAPHanaSR-testCluster-html b/test/tester/SAPHanaSR-testCluster-html similarity index 100% rename from test/SAPHanaSR-testCluster-html rename to test/tester/SAPHanaSR-testCluster-html diff --git a/test/saphana_sr_test.py b/test/tester/saphana_sr_test.py similarity index 100% rename from test/saphana_sr_test.py rename to test/tester/saphana_sr_test.py