From 050503bdab07585b06c529f0fb55da0c9e1f7d9b Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 6 Nov 2024 08:48:55 +0100 Subject: [PATCH 1/2] =?UTF-8?q?aligned=20all=20man=20pags=20with=20today?= =?UTF-8?q?=C2=B4s=20fmherschel/SAPHanaSR/angi-devel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- man/SAPHanaController-scale-out.7 | 1 + man/SAPHanaController-scale-up.7 | 1 + man/SAPHanaFilesystem.7 | 1 + man/SAPHanaSR-ScaleOut.7 | 18 ++- man/SAPHanaSR-ScaleOut_basic_cluster.7 | 138 ++++++++++-------- man/SAPHanaSR-alert-fencing.8 | 49 +++++-- man/SAPHanaSR-angi-scenarios.7 | 188 +++++++++++++++++++++++++ man/SAPHanaSR-angi.7 | 9 +- man/SAPHanaSR-showAttr.8 | 4 +- man/SAPHanaSR-upgrade-to-angi-demo.8 | 2 +- man/SAPHanaSR.7 | 12 +- man/SAPHanaSR_basic_cluster.7 | 113 +++++++-------- man/SAPHanaSR_maintenance_examples.7 | 61 ++++---- man/SAPHanaSR_upgrade_to_angi.7 | 4 +- man/SAPHanaTopology.7 | 1 + man/ocf_suse_SAPHana.7 | 4 +- man/ocf_suse_SAPHanaController.7 | 44 +++--- man/susHanaSR.py.7 | 41 +++++- man/susHanaSrMultiTarget.py.7 | 11 +- 19 files changed, 497 insertions(+), 205 deletions(-) create mode 100644 man/SAPHanaController-scale-out.7 create mode 100644 man/SAPHanaController-scale-up.7 create mode 100644 man/SAPHanaFilesystem.7 create mode 100644 man/SAPHanaSR-angi-scenarios.7 create mode 100644 man/SAPHanaTopology.7 diff --git a/man/SAPHanaController-scale-out.7 b/man/SAPHanaController-scale-out.7 new file mode 100644 index 00000000..3aa4bcac --- /dev/null +++ b/man/SAPHanaController-scale-out.7 @@ -0,0 +1 @@ +.so man7/ocf_suse_SAPHanaController.7 diff --git a/man/SAPHanaController-scale-up.7 b/man/SAPHanaController-scale-up.7 new file mode 100644 index 00000000..31339779 --- /dev/null +++ b/man/SAPHanaController-scale-up.7 @@ -0,0 +1 @@ +.so man7/ocf_suse_SAPHana.7 diff --git a/man/SAPHanaFilesystem.7 b/man/SAPHanaFilesystem.7 new file mode 100644 index 00000000..7027fd2c --- /dev/null +++ b/man/SAPHanaFilesystem.7 @@ -0,0 +1 @@ +.so man7/ocf_suse_SAPHanaFilesystem.7 diff --git a/man/SAPHanaSR-ScaleOut.7 b/man/SAPHanaSR-ScaleOut.7 index 27e43da6..eb2eed8d 100644 --- a/man/SAPHanaSR-ScaleOut.7 +++ b/man/SAPHanaSR-ScaleOut.7 @@ -1,13 +1,13 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-ScaleOut 7 "18 Dec 2023" "" "SAPHanaSR-angi" +.TH SAPHanaSR-ScaleOut 7 "20 Sep 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-ScaleOut \- Automating SAP HANA system replication in scale-out setups. .PP .\" .SH DESCRIPTION -.\" +.PP \fBOverview\fR .PP This manual page SAPHanaSR-ScaleOut provides information for setting up @@ -328,16 +328,21 @@ Important is that this feature does not change the HANA topology or interfaces. In opposite to Native Storage Extension, the HANA Extension Nodes are changing the topology and thus currently are not supported. Please refer to SAP documentation for details. -.PP +.PP +27. No manual actions must be performed on the HANA database while it is controlled +by the Linux cluster. All administrative actions need to be aligned with the cluster. +See also SAPHanaSR_maintenance_examples(7). +.PP .\" .SH BUGS -.\" TODO +.PP In case of any problem, please use your favourite SAP support process to open a request for the component BC-OP-LNX-SUSE. Please report any other feedback and suggestions to feedback@suse.com. .PP .\" .SH SEE ALSO +.PP \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR\fP(7) , \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , \fBSAPHanaSR-ScaleOut_basic_cluster\fP(7) , @@ -371,14 +376,15 @@ https://blogs.sap.com/2020/01/30/sap-hana-and-persistent-memory/ .\" TODO SAP notes 3007062 ... .PP .SH AUTHORS -.br +.PP A.Briel, F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT +.PP (c) 2015-2017 SUSE Linux GmbH, Germany. .br -(c) 2018-2023 SUSE LLC +(c) 2018-2024 SUSE LLC .br The package SAPHanaSR-angi comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/SAPHanaSR-ScaleOut_basic_cluster.7 b/man/SAPHanaSR-ScaleOut_basic_cluster.7 index 02b44ba1..68380021 100644 --- a/man/SAPHanaSR-ScaleOut_basic_cluster.7 +++ b/man/SAPHanaSR-ScaleOut_basic_cluster.7 @@ -1,13 +1,13 @@ -.\" Version: 1.001 +.88\" Version: 1.001 .\" -.TH SAPHanaSR-ScaleOut_basic_cluster 7 "18 Mar 2024" "" "SAPHanaSR" +.TH SAPHanaSR-ScaleOut_basic_cluster 7 "27 Sep 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR-ScaleOut_basic_cluster \- SAP HANA System Replication scale-out basic cluster configuration. .PP .\" .SH DESCRIPTION -.\" +.PP The SAP HANA System Replication scale-out scenario needs a certain basic cluster configuration. Besides this necessary settings, some additional configurations might match specific needs. Adapting a few SAP HANA settings @@ -15,12 +15,11 @@ might be beneficial as well. .\" .\" \fB* Corosync Basics\fR .\" -.\".PP - +.PP \fB* CRM Basics\fR - +.PP \fBno-quorum-policy = freeze\fR - +.PP The crm basic parameter no-quorum-policy defines how the cluster should act in case of quorum loss. With more than two nodes, the cluster must not ignore the quorum loss. For SAPHanaSR-ScaleOut, an odd number of nodes is required. Setting @@ -28,9 +27,9 @@ no-quorum-policy to 'freeze' won't allow the partition to shoot any other node when it doesn't have quorum. Cluster will not be able to add and start new resources, but running will stay alive. If the cluster uses disk-less SBD, the no-quorum-policy 'suicide' is required. - +.PP \fBdefault-resource-stickiness = 1000\fR - +.PP The crm basic parameter default-resource-stickiness defines the 'stickiness' score a resource gets on the node where it is currently running. This prevents the cluster from moving resources around without an urgent need during a @@ -40,80 +39,88 @@ HANA primary master resource can affect cluster decisions. Too high value might prevent not only unwanted but also useful actions. This is because SAPHanaSR uses an internal scoring table for placing the HANA roles on the right nodes. - +.PP \fBconcurrent-fencing = true\fR - +.PP The crm basic parameter concurrent-fencing allows the cluster to fence more than one node at a time. This helps to reduce the time needed for a take over in case a whole data center is lost. If nodes are fenced one by one, the time needed would be equal to the \fBnumber of nodes * stonith timeout\fR. With concurrent-fencing enabled the time needed is in the range of \fB2 * stonith timeout\fR, independent of the number of nodes. See also \fBpcmk_action_limit\fR below. - +.PP \fBfailure-timeout = 86400\fR - +.PP The crm basic parameter failure-timeout defines how long failed actions will be kept in the CIB. After that time the failure record will be deleted. The time is measured in seconds. See also \fBmigration-threshold\fR below. - +.PP \fBmigration-threshold = 50\fR - +.PP The crm basic parameter migration-threshold defines how many errors on a resource can be detected before this resource will be migrated to another node. See also \fBfailure-timeout\fR. - +.PP \fBrecord-pending = false\fR - +.PP The op_default record-pending defines, whether the intention of an action upon the resource is recorded in the Cluster Information Base (CIB). Setting this parameter to 'true' allows the user to see pending actions like 'starting' and 'stopping' in \fBcrm_mon\fR and \fBHawk\fR. - .PP \fB* SBD STONITH Basics\fR - +.PP \fBpcmk_action_limit = -1\fR - +.PP The sbd stonith parameter pcmk_action_limit defines the maximum number of concurrent fencing actions. It allows parallel fencing of multiple nodes. A value of '-1' means virtually unlimited. See also \fBconcurrent-fencing\fR above. - +.PP \fBpcmk_delay_max = 1s\fR - +.PP The sbd stonith parameter pcmk_delay_max defines an upper limit for waiting before a fencing/stonith request will be triggerd. This parameter should prevent the cluster from unwanted double fencing in case of spilt-brain. A value around 30 seconds is required in two-node clusters. It is not needed in usual SAPHanaSR-ScaleOut setups. - .PP \fB* systemd Basics\fR - +.PP \fBsaphostagent.service enabled\fR .br \fBSAP${SID}_${INO}.service enabled\fR - +.PP In case systemd-style init is used for the HANA database, the services saphostagent and SAP${SID}_${INO} need to be enabled and running inside the SAP slice. The instance profile Autostart feature needs to be off. The service saptune is highly recommended, see manual page saptune(8). - +.PP \fB* pacemaker service dependency to SAP instance service\fR - +.PP \fB[Unit]\fR .br \fBWants=SAP${SID}_${INO}.service\fR .br \fBAfter=SAP${SID}_${INO}.service\fR - +.PP In case systemd-style init is used for the HANA database, it might be desired to have the SAP instance service stopping after pacemaker at system shutdown. Therefor a drop-in file for the pacemaker service might help. See examples below. - +.PP +\fB* pacemaker service basics\fR +.PP +\fBPCMK_fail_fast = yes\fR +.PP +The parameter PCMK_fail_fast in /etc/sysconfig/pacemaker specifies how pacemaker +reacts on failures of its subdaemons. Default "no" means to restart failed +subdaemons, while "yes" means fencing the node. Setting "yes" might help to avoid +undefined situations. See also SAPHanaSR-alert-fencing(8). +.br +Optional, default no .PP \fB* SAP HANA Basics\fR - +.PP \fB/usr/sap/${SID}/SYS/global/hdb/custom/config/global.ini\fR .PP \fB[memorymanager]\fR @@ -126,7 +133,7 @@ Starting with SAP HANA 2.0 SPS06, the database shutdown can be accelerated by optimizing memory de-allocation. Please refer to SAP documentation before setting this parameters. .\" TODO SAP notes 3405297 ? - +.PP \fB/usr/sap/${SID}/SYS/global/hdb/custom/config/daemon.ini .PP \fB[daemon]\fR @@ -143,13 +150,37 @@ child processes when HANA is shutting down by the QUIT event. See also manual pa susChkSrv.py(7). Please refer to SAP documentation before setting this parameters. .\" TODO check above - +.PP +\fB/hana/shared/${SID}/global/hdb/custom/config/nameserver.ini\fR +.PP +\fB[landscape]\fR +.br +\fBmaster = \fInode1\fB:31\fInr\fB1\fR +.br +\fBworker = \fInode1 node2\fR +.br +\fBactive_master = \fInode1\fB:31\fInr\fB1\fR +.br +\fBroles_\fInode1\fB = worker\fR +.br +\fBroles_\fInode2\fB = worker\fR +.PP +For two-node scale-out HANA without standby nodes, this entry is needed at both +sites that are managed by the Linux cluster. +The HANA has to be stopped before the files can be edited. Do not copy the file +between nodes. It might be necessary to un-register and re-register the secondary +to make the change effective on the secondary site. +In the example, \fInode1\fR should be the master namerserver´s hostname, +\fInode2\fR the worker´s hostname, \fInr\fR the instance number. Crucial is to +define only one master namerserver per site, but no candidates. +Please check SAP HANA documentation for details. +.\" In this example the master node is suse11, the worker is suse12. The instance number is 00. .PP .\" .SH EXAMPLES - +.PP \fB* crm basic configuration\fR - +.PP Below is an example crm basic configuration for SAPHanaSR-ScaleOut. Shown are specific parameters which are needed. Some general parameters are left out. .br @@ -231,9 +262,8 @@ op_defaults op-options: \\ .RE .PP .\" TODO example for SLE-HA 15 SP5 with disk-based and diskless SBD. - \fB* crm SBD stonith configuration\fR - +.PP To complete the SBD setup, it is necessary to activate SBD as STONITH/fencing mechanism in the CIB. The SBD is normally used for SAPHanaSR-ScaleOut instead of any other fencing/stonith mechanism. Example for a basic disk-based SBD @@ -246,9 +276,8 @@ primitive rsc_stonith_sbd stonith:external/sbd \\ params pcmk_action_limit="-1" pcmk_delay_max="1" .RE .PP - \fB* crm simple IP address resource configuration\fR - +.PP Let the Linux cluster manage one IP address and move that address along with the HANA primary master nameserver. .PP @@ -265,9 +294,8 @@ colocation col_ip_with_SLE_HDB00 \\ 2000: rsc_ip_SLE_HDB00:Started mst_SAPHanaCon_SLE_HDB00:Promoted .RE .PP - \fB* crm IP address for active/active read-enabled resource configuration\fR - +.PP Let the Linux cluster manage an additional IP address and move that address along with the HANA secondary master nameserver. .br @@ -294,9 +322,8 @@ location loc_ip_ro_not_master_SLE_HDB00 \\ .\" TODO works this for multi-node: rule 8000: score eq 100 .RE .PP - \fB* crm grouped IP address resource configuration\fR - +.PP Let the Linux cluster manage one IP address and move that address along with the HANA primary master nameserver. An auxiliary resource is needed for specific public cloud purpose. @@ -322,9 +349,8 @@ colocation col_ip_with_SLE_HDB00 \\ 8000: grp_ip_SLE_HDB00:Started mst_SAPHanaCon_SLE_HDB00:Promoted .RE .PP - \fB* check how resource stickiness affects promotion scoring\fR - +.PP SAPHanaSR uses an internal scoring table. The promotion scores for HANA primary and secondary master are in a certain range. The scores used by the Linux cluster should be in the same range. @@ -335,9 +361,8 @@ Linux cluster should be in the same range. # crm_simulate -Ls | grep promotion .RE .PP - \fB* clean up SDB stonith resource after write failure\fR - +.PP In rare cases the SBD stonith resource failes writing to the block device. After the root cause has been found and fixed, the failure message can be cleaned. @@ -346,9 +371,8 @@ cleaned. # stonith_admin --cleanup --history= .RE .PP - \fB* check saphostagent and show SAP instances\fR - +.PP Basic check for the saphostagent. .PP .RS 2 @@ -357,9 +381,8 @@ Basic check for the saphostagent. # /usr/sap/hostctrl/exe/saphostctrl -function ListInstances .RE .PP - \fB* check systemd services for the HANA database\fR - +.PP In case systemd-style init is used for the HANA database, the services can be checked. Example SID is HA1, instance number is 10. .PP @@ -376,14 +399,12 @@ checked. Example SID is HA1, instance number is 10. .\" TODO check Autostart not set. .RE .PP - \fB* show pacemaker service drop-in file\fR - +.PP In case systemd-style init is used for the HANA database, it might be desired to have the SAP instance service stopping after pacemaker at system shutdown. A drop-in file might help. Example SID is S07, instance number is 00. - -.pp +.PP .RS 2 # cat /etc/systemd/system/pacemaker.service.d/00-pacemaker.conf .br @@ -398,9 +419,8 @@ Wants=SAPS07_00.service After=SAPS07_00.service .RE .PP - \fB* check for pacemaker dependency to SAP instance service\fR - +.PP Example SID is S07, instance number is 00. .PP .RS 2 @@ -413,12 +433,14 @@ Example SID is S07, instance number is 00. .PP .\" .SH BUGS +.PP In case of any problem, please use your favourite SAP support process to open a request for the component BC-OP-LNX-SUSE. Please report any other feedback and suggestions to feedback@suse.com. .PP .\" .SH SEE ALSO +.PP \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , \fBsbd\fP(8) , \fBstonith_sbd\fP(7) , \fBstonith_admin\fP(8) , @@ -437,12 +459,14 @@ https://www.suse.com/support/kb/ , .br https://www.clusterlabs.org .PP +.\" .SH AUTHORS -.br +.PP F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT +.PP (c) 2018 SUSE Linux GmbH, Germany. .br (c) 2019-2024 SUSE LLC diff --git a/man/SAPHanaSR-alert-fencing.8 b/man/SAPHanaSR-alert-fencing.8 index 6a6f91d8..5bb8ec5e 100644 --- a/man/SAPHanaSR-alert-fencing.8 +++ b/man/SAPHanaSR-alert-fencing.8 @@ -1,6 +1,6 @@ -.\" Version: 1.001 +.\" Version: 1.2.4 .\" -.TH SAPHanaSR-alert-fencing 7 "12 Jun 2024" "" "SAPHanaSR" +.TH SAPHanaSR-alert-fencing 7 "18 Sep 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR-alert-fencing \- Alert agent for cluster fencing alerts. @@ -9,16 +9,16 @@ SAPHanaSR-alert-fencing \- Alert agent for cluster fencing alerts. .SH DESCRIPTION SAPHanaSR-alert-fencing can be used to react on Linux cluster fencing alerts. .PP -The Linux cluster provides an interface to initiate external action when a cluster -event occurs (alert). Than the cluster calls an external program (an alert agent) -to handle that alert. +The Linux cluster provides an interface to initiate external actions when a +cluster event occurs (alert). Than the cluster calls an external program (an +alert agent) to handle that alert. .PP When the Linux cluster has performed an node fencing, it can call SAPHanaSR-alert-fencing on each active cluster node. The agent checks whether the local node belongs to the same HANA site as the fenced node. If so, it asks the cluster to fence the local node as well. .PP -This improves three use cases for HANA scale-out: +This improves four use cases for HANA scale-out: .br - HA/DR provider hook script susChkSrv.py action_on_lost=fence .br @@ -26,8 +26,10 @@ This improves three use cases for HANA scale-out: .br - resource agent SAPHanaFilesystem ON_FAIL_ACTION=fence .br -See also manual pages ocf_sus_SAPHanaController(7), ocf_suse_SAPHanaFilesystem(7) -and susChkSrv.py(7). +- pacemaker service PCMK_fail_fast=yes +.br +See also manual pages ocf_suse_SAPHanaController(7), ocf_suse_SAPHanaFilesystem(7), +SAPHanaSR-ScaleOut_basic_cluster(7) and susChkSrv.py(7). .PP .\" .SH SUPPORTED PARAMETERS @@ -111,6 +113,16 @@ Note: Understand the impact before trying. # crm node fence node1 .RE .PP +\fB*\fR Example for sudo permissions in /etc/sudoers.d/SAPHanaSR . +.PP +See also manual page sudoers(5). +.PP +.RS 2 +# SAPHanaSR-alert-fencing needs +.br +hacluster ALL=(ALL) NOPASSWD: /usr/sbin/crm --force node fence * +.RE +.PP .\" .SH FILES .TP @@ -122,6 +134,9 @@ the internal cache for host to site relation - do not touch this file .TP /etc/sysconfig/sbd config file for SBD daemon +.TP +/etc/sysconfig/pacermaker +config file for pacemaker daemon .PP .\" .SH REQUIREMENTS @@ -136,17 +151,23 @@ thus no standby nodes. .PP 5. No other alert agent should be configured for the fencing alert. .PP -6. SAPHanaFilesystem RA with monitor operations is active. +6. User hacluster is member of group haclient. Both are defined locally on each cluster nodes. +.PP +7. User hacluster needs password-less sudo permission on "/usr/sbin/crm --force node fence *". +.PP +8. Concurrent fencing is configured, see manual page SAPHanaSR-ScaleOut_basic_cluster(7). +.PP +9. SAPHanaFilesystem RA with monitor operations is active. .PP -7. Automatic restart of just fenced nodes should be disabled by adapting +10. Automatic restart of just fenced nodes should be disabled by adapting SBD_START_MODE. In case of automatic restart of just fenced nodes, it might be necessary to adapt SBD_START_DELAY in order to avoid fencing loops. See manual page sbd(8). .PP -8. Fencing is executed unconditionally. The alert agent relies on the preceding -fencing decision. Neither site role nor SR state is checked. +11. The alert agent unconditionally executes fencing. The alert agent relies on +the preceding fencing decision. Neither site role nor SR state is checked. .PP -9. The alert agent runtime almost completely depends on call-outs to OS and +12. The alert agent runtime almost completely depends on call-outs to OS and Linux cluster. .\" .SH BUGS @@ -158,7 +179,7 @@ Please report any other feedback and suggestions to feedback@suse.com. .SH SEE ALSO \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , -\fBsusChkSrv.py\fP(7) , \fBcrm\fP(8) , \fBsbd\fP(8) , +\fBsusChkSrv.py\fP(7) , \fBcrm\fP(8) , \fBsbd\fP(8) , \fBsudoers\fP(5) , .br https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/singlehtml/#alert-agents .PP diff --git a/man/SAPHanaSR-angi-scenarios.7 b/man/SAPHanaSR-angi-scenarios.7 new file mode 100644 index 00000000..e5eea42a --- /dev/null +++ b/man/SAPHanaSR-angi-scenarios.7 @@ -0,0 +1,188 @@ +.\" Version: 1.2 +.\" +.TH SAPHanaSR-angi-scenarios 7 "28 Oct 2024" "" "SAPHanaSR-angi" +.\" +.SH NAME +SAPHanaSR-angi-scenarios \- SAP HANA system replication scenarios. +.PP +.\" +.SH DESCRIPTION +.PP +SAPHanaSR-angi covers two topologies, scale-up and scale-out. On each topology, +several scenarios are supported. Finally, for most scenarios, two variants are +possible. +.PP +\fB* Scenarios overview\fB +.PP +The table below shows the known SUSE HA scenarios for HANA system replication +for the two HANA topologies. The current support status is also shown. +.PP +.\" see man tbl and https://technicallywewrite.com/2023/09/23/tblexample +.TS +tab(@) allbox center; +cb cb cb +c c c +^ c ^ +^ c ^ +^ c c +^ c ^ +^ c ^ +^ c ^ +^ c c +^ c ^ +c c c +^ c ^ +^ c ^ +^ c c +^ c c +^ c ^ +^ c ^ +^ c ^. +Topology@Scenario@Status +Scale-Up@perf-opt@Supported +@perf-opt, 2nd site read-enabled@Supported +@perf-opt, multi-target, 3rd site outside cluster@Supported +@perf-opt, multi-SID@Undocumented +@perf-opt, w. S/4 ENSA2 in same cluster@Undocumented +@cost-opt@Undocumented +@cost-opt, multi-target@Non-supported +@perf-opt, multi-target, 3rd site inside cluster@Non-supported +@two perf-opt clusters connected@Non-supported +Scale-Out@perf-opt, up to 12 nodes, no standby (BW)@Supported +@perf-opt, 4 nodes, 2nd site read-enabled (ERP)@Supported +@perf-opt, multi-target, 3rd site outside cluster@Supported +@perf-opt, up to 30 nodes w. standby (BW)@Undocumented +@perf-opt, multi-target, 3rd site inside cluster@Non-supported +@perf-opt, multi-SID@Non-supported +@cost-opt@Non-supported +@two perf-opt clusters connected@Non-supported +.TE +.PP +.RS 8 +Note: One additional Linux cluster node at 3rd site is needed for all scale-out scenarios. +.RE +.\" TODO align wording with "Supported HA Solutions" +.PP +\fBSupported\fP - the scenario is known to work for the given topology. The +setup has been tested and is documented in a setup guide. It is supported by +SUSE. +.PP +\fBUndocumented\fP - the scenario is expected to work for the given topology. +The setup is currently not documented in a setup guide. SUSE services are prepared to help with implementation. Once the setutp is working, it is supported by +SUSE. +.PP +\fBNon-supported\fP - the scenario is expected not to work. It is not supported +by SUSE. +.PP +For details on requirements and configuration of the scenarios, please refer to manual pages SAPHanaSR-angi(7), SAPHanaSR(7) and SAPHanaSR-ScaleOut(7), as well as the respective setup guides. +.PP +\fB* Scenario notation\fB +.PP +It might help to describe scenarios in a pseudo-visualising notation. +.PP +.TS +tab(@) allbox center; +cb cb +c c +c c +c c +c c +c c +c c +c c +c c. +Symbol@Meaning +[ ]@Linux cluster + A B C@master nameserver node + a b c@worker node + _ @ standby node +=>@syncronous replication +->@asyncronous replication +'@primary IP address +"@secondary (read-enabled) IP address +.TE + +.PP +The scale-up performance-optimised multi-target scenario can be noted as: +.br +[ A' => B ] -> C +.PP +\fB* Variants overview\fB +.PP +.\" TODO variants conservative, progressive +SAPHanaSR-angi allows to define different variants of reaction on failures. +.PP +\fBConservative\fR – default configuration +.br +The cluster is patient, it prefers stopping HANA over fencing nodes. It does +not react on filesystem failures. +.PP +\fBProgressive\fR – alternative configuration +.br +The cluster reacts on failures of HANA or filesystem with fencing all nodes +of the affected site. Takeover time might be further reduced by using diskless SBD. +.PP +For details on configuration of the variants, please refer to manual pages +ocf_suse_SAPHanaController(7), ocf_suse_SAPHana(7), ocf_suse_SAPHanaFilesystem(7), SAPHanaSR-alert-fencing(8), susChkSrv.py(7), +SAPHanaSR_basic_cluster(7), SAPHanaSR-ScaleOut_basic_cluster(7). +.PP +.\" +.SH EXAMPLES +.PP +\fB* Examples for supported scenarios\fR +.TP +Scale-up performance-optimised multi-target +[ A' => B ] -> C +.TP +Scale-up cost-optimised +[ A' => B,Q ] +.TP +Scale-out performance-optimised multi-target read-enabled (ERP) +[ 'Aa => "Bb ] -> Cc -> Dd +.TP +Scale-out performance-optimised 8-node w. standby (BW) +[ 'Aaaaaaa_ => Bbbbbbb_ ] +.PP +\fB* Examples for non-supported scenarios\fR +.TP +Scale-up performance-optimised multi-target 3rd site in cluster +[ A' => B => C ] +.TP +Scale-out performance-optimised two clusters connected +[ 'Aa => Bb ] -> [ 'Cc => Dd ] +.PP +.\" +.SH BUGS +.PP +In case of any problem, please use your favourite SAP support process to open +a request for the component BC-OP-LNX-SUSE. +Please report any other feedback and suggestions to feedback@suse.com. +.PP +.\" +.SH SEE ALSO +.PP +\fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , +\fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHana\fP(7) , +\fBocf_suse_SAPHanaFilesystem\fP(7) , \fBSAPHanaSR-alert-fencing\fP(8) , +\fBsusChkSrv.py\fP(7) , \fBSAPHanaSR_basic_cluster\fP(7) , +\fBSAPHanaSR-ScaleOut_basic_cluster\fP(7) , +.br +https://documentation.suse.com/sles-sap/sap-ha-support/html/sap-ha-support/article-sap-ha-support.html , +.br +https://documentation.suse.com/sbp/sap-15/ +.PP +.\" +.SH AUTHORS +.PP +A.Briel, F.Herschel, L.Pinne. +.PP +.\" +.SH COPYRIGHT +.PP +(c) 2024 SUSE LLC +.br +The package SAPHanaSR-angi comes with ABSOLUTELY NO WARRANTY. +.br +For details see the GNU General Public License at +http://www.gnu.org/licenses/gpl.html +.\" diff --git a/man/SAPHanaSR-angi.7 b/man/SAPHanaSR-angi.7 index 3e8fe6e8..414ebcfb 100644 --- a/man/SAPHanaSR-angi.7 +++ b/man/SAPHanaSR-angi.7 @@ -1,6 +1,6 @@ -.\" Version: 1.001 +.\" Version: 1.2 .\" -.TH SAPHanaSR 7 "08 May 2023" "" "SAPHanaSR-angi" +.TH SAPHanaSR 7 "04 Nov 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-angi \- SAP HANA SR - Advanced Next Generation Interface. @@ -37,8 +37,6 @@ The following SAP HANA SR scenarios are possible with SAPHanaSR-angi: * scale-out single-tenant or multi-tenant (MDC low isolation) for all the above .RE .PP -.B SAPHanaSR-angi is shipped as technology preview. -.PP .\" .SH REQUIREMENTS Please find information on specific requirements of Linux HA scenarios for SAP HANA @@ -61,6 +59,7 @@ Please report any other feedback and suggestions to feedback@suse.com. .\" .SH SEE ALSO \fBSAPHanaSR\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , +\fBSAPHanaSR-angi-scenarios\fP(7) , \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , \fBSAPHanaSR_basic_cluster\fP(7) , @@ -102,7 +101,7 @@ A.Briel, F.Herschel, L.Pinne. .\" .SH COPYRIGHT .br -(c) 2022-2023 SUSE LLC +(c) 2022-2024 SUSE LLC .br The package SAPHanaSR-angi comes with ABSOLUTELY NO WARRANTY. .br diff --git a/man/SAPHanaSR-showAttr.8 b/man/SAPHanaSR-showAttr.8 index 438db263..cb023bd9 100644 --- a/man/SAPHanaSR-showAttr.8 +++ b/man/SAPHanaSR-showAttr.8 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR-showAttr 8 "24 Jan 2024" "" "SAPHanaSR" +.TH SAPHanaSR-showAttr 8 "09 Jul 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR-showAttr \- Shows Linux cluster attributes for SAP HANA system replication. @@ -532,7 +532,7 @@ show version. .TP 4 \fB --select\fR \fISELECT\fR .\" TODO explain meaning of values -show selected information only. Allowed values: [ all | default | minimal | sr | cluster | cluster2 | cluster3 | skitelist ]. Default is default. +show selected information only. Allowed values: [ all | default | minimal | sr | cluster | cluster2 | cluster3 | sitelist ]. Default is default. .TP 4 \fB --sid\fR \fISID\fR use SAP system ID \fISID\fR. Should be autodetected, if there is only one SAP HANA instance installed on the local cluster node. The SAP system ID is a 3 alphanum string with a valid SAP system name like SLE, HAE, FH1, C11, or P42. diff --git a/man/SAPHanaSR-upgrade-to-angi-demo.8 b/man/SAPHanaSR-upgrade-to-angi-demo.8 index ca9c72ca..a384420e 100644 --- a/man/SAPHanaSR-upgrade-to-angi-demo.8 +++ b/man/SAPHanaSR-upgrade-to-angi-demo.8 @@ -62,7 +62,7 @@ The script needs to be still available on both cluster nodes after the SAPHanaSR RPM has been removed. Needs to be done on all cluster nodes. .PP .RS 2 -# cp -a /usr/shared/SAPHanaSR/samples/SAPHanaSR-upgrade-to-angi-demo /root/bin/ +# cp -a /usr/share/SAPHanaSR/samples/SAPHanaSR-upgrade-to-angi-demo /root/bin/ .br # chmod 755 /root/bin/SAPHanaSR-upgrade-to-angi-demo .br diff --git a/man/SAPHanaSR.7 b/man/SAPHanaSR.7 index 68b6103e..200dd0a5 100644 --- a/man/SAPHanaSR.7 +++ b/man/SAPHanaSR.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR 7 "18 Dec 2023" "" "SAPHanaSR-angi" +.TH SAPHanaSR 7 "20 Sep 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR \- Automating SAP HANA system replication in scale-up setups. @@ -295,15 +295,20 @@ Please refer to SAP documentation for details. .PP 25. The Linux user root´s shell is /bin/bash, or completely compatible. .PP +26. No manual actions must be performed on the HANA database while it is controlled +by the Linux cluster. All administrative actions need to be aligned with the cluster. +See also SAPHanaSR_maintenance_examples(7). +.PP .\" .SH BUGS -.\" TODO +.PP In case of any problem, please use your favourite SAP support process to open a request for the component BC-OP-LNX-SUSE. Please report any other feedback and suggestions to feedback@suse.com. .PP .\" .SH SEE ALSO +.PP \fBSAPHanaSR-angi\fP(7) , \fBSAPHanaSR-ScaleOut\fP(7) , \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , \fBSAPHanaSR_basic_cluster\fP(7) , @@ -340,11 +345,12 @@ https://blogs.sap.com/2020/01/30/sap-hana-and-persistent-memory/ .PP .\" .SH AUTHORS -.br +.PP A.Briel, F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT +.PP (c) 2015-2017 SUSE Linux GmbH, Germany. .br (c) 2018-2024 SUSE LLC diff --git a/man/SAPHanaSR_basic_cluster.7 b/man/SAPHanaSR_basic_cluster.7 index 5ff08b17..8e9a063a 100644 --- a/man/SAPHanaSR_basic_cluster.7 +++ b/man/SAPHanaSR_basic_cluster.7 @@ -1,13 +1,13 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR 7 "18 Mar 2024" "" "SAPHanaSR_basic_cluster" +.TH SAPHanaSR 7 "27 Sep 2024" "" "SAPHanaSR_basic_cluster" .\" .SH NAME SAPHanaSR_basic_cluster \- SAP HANA System Replication scale-up basic cluster configuration. .PP .\" .SH DESCRIPTION -.\" +.PP The SAP HANA System Replication scale-up scenario needs a certain basic cluster configuration. Besides this necessary settings, some additional configurations might match specific needs. Adapting a few SAP HANA settings @@ -15,12 +15,11 @@ might be beneficial as well. .\" .\" \fB* Corosync Basics\fR .\" -.\".PP - +.PP \fB* CRM Basics\fR - +.PP \fBdefault-resource-stickiness = 1000\fR - +.PP The crm basic parameter default-resource-stickiness defines the 'stickiness' score a resource gets on the node where it is currently running. This prevents the cluster from moving resources around without an urgent need during a @@ -32,35 +31,34 @@ This is because SAPHanaSR uses an internal scoring table for placing the HANA roles on the right nodes. .br Mandatory, default 1. - +.PP \fBfailure-timeout = 86400\fR - +.PP The crm basic parameter failure-timeout defines how long failed actions will be kept in the CIB. After that time the failure record will be deleted. The time is measured in seconds. See also migration-threshold below. .br Optional, no default. - +.PP \fBmigration-threshold = 5000\fR - +.PP The crm basic parameter migration-threshold defines how many errors on a resource can be detected before this resource will be migrated to another node. See also \fBfailure-timeout\fR. .br Mandatory, default 3. - +.PP \fBrecord-pending = true\fR - +.PP The op_default record-pending defines, whether the intention of an action upon the resource is recorded in the Cluster Information Base (CIB). Setting this parameter to 'true' allows the user to see pending actions like 'starting' and 'stopping' in \fBcrm_mon\fR and \fBHawk\fR. .br Optional, default false. - .PP \fBpcmk_delay_max = 30\fR - +.PP The sbd stonith parameter pcmk_delay_max defines an upper limit for waiting before a fencing/stonith request will be triggerd. This parameter should prevent the cluster from unwanted double fencing in case @@ -68,10 +66,9 @@ of spilt-brain. A value around 30 seconds is required in two-node clusters, except priority fencing is used. .br Mandatory, default 5. - .PP \fBpriority-fencing-delay = 30\fR - +.PP The optional crm property priority-fencing-delay specified delay for the fencings that are targeting the lost nodes with the highest total resource priority in case we do not have the majority of the nodes in our cluster @@ -86,34 +83,43 @@ The delay should be significantly greater than, or safely twice, pcmk_delay_max. .br Optional, no default. - .PP \fB* systemd Basics\fR - +.PP \fBsaphostagent.service enabled\fR .br \fBSAP${SID}_${INO}.service enabled\fR - +.PP In case systemd-style init is used for the HANA database, the services saphostagent and SAP${SID}_${INO} need to be enabled and running inside the SAP slice. The instance profile Autostart feature needs to be off. The service saptune is highly recommended, see manual page saptune(8). - +.PP \fB* pacemaker service dependency to SAP instance service\fR - +.PP \fB[Unit]\fR .br \fBWants=SAP${SID}_${INO}.service\fR .br \fBAfter=SAP${SID}_${INO}.service\fR - +.PP In case systemd-style init is used for the HANA database, it might be desired to have the SAP instance service stopping after pacemaker at system shutdown. Therefor a drop-in file for the pacemaker service might help. See examples below. - +.PP +\fB* pacemaker service basics\fR +.PP +\fBPCMK_fail_fast = yes\fR +.PP +The parameter PCMK_fail_fast in /etc/sysconfig/pacemaker specifies how pacemaker +reacts on failures of its subdaemons. Default "no" means to restart failed +subdaemons, while "yes" means fencing the node. Setting "yes" might help to avoid +undefined situations. +.br +Optional, default no .PP \fB* SAP HANA Basics\fR - +.PP \fB/usr/sap/${SID}/SYS/global/hdb/custom/config/global.ini\fR .PP \fB[memorymanager]\fR @@ -126,8 +132,8 @@ Starting with SAP HANA 2.0 SPS06, the database shutdown can be accelerated by optimizing memory de-allocation. Please refer to SAP documentation before setting this parameters. .\" TODO SAP notes 3405297 ? - -\fB/usr/sap/${SID}/SYS/global/hdb/custom/config/daemon.ini +.PP +\fB/usr/sap/${SID}/SYS/global/hdb/custom/config/daemon.ini\fR .PP \fB[daemon]\fR .br @@ -142,16 +148,15 @@ The second defines the timeout from sending the SIGTERM to finally terminating child processes when HANA is shutting down by the QUIT event. Please refer to SAP documentation before setting this parameters. .\" TODO check above - .PP .\" .SH EXAMPLES - +.PP \fB* crm basic configuration\fR - +.PP Below is an example crm basic configuration for SAPHanaSR. Shown are specific parameters which are needed. Some general parameters are left out. - +.PP The following example is for 15 SP5 with disk-based SBD: .PP .RS 2 @@ -186,9 +191,8 @@ op_defaults op-options: \\ record-pending=true .RE .PP - \fB* crm simple SBD stonith configuration\fR - +.PP To complete the SBD setup, it is necessary to activate SBD as STONITH/fencing mechanism in the CIB. The SBD is normally used for SAPHanaSR scale-up instead of any other fencing/stonith mechanism. Example for a basic disk-based @@ -200,9 +204,8 @@ primitive rsc_stonith_sbd stonith:external/sbd \\ params pcmk-delay-max=30 .RE .PP - \fB* crm priority fencing SBD stonith configuration\fR - +.PP .\" TODO priority fencing for two-node cluster, rsc_SAPHana_... meta priority=100 Example for a priority fencing disk-based SBD resource. .PP @@ -217,9 +220,8 @@ property cib-bootstrap-options: \\ priority-fencing-delay=30 .RE .PP - \fB* crm simple IP address resource configuration\fR - +.PP Let the Linux cluster manage one IP address and move that address along with the HANA primary master nameserver. .PP @@ -237,9 +239,8 @@ colocation col_ip_with_SLE_HDB00 \\ .RE .PP .\" TODO seamless maintenance IP location - \fB* crm IP address for active/active read-enabled resource configuration\fR - +.PP Let the Linux cluster manage an additional IP address and move that address along with the HANA secondary master nameserver. .\" TODO multi-node see below @@ -264,13 +265,12 @@ location loc_ip_ro_not_master_SLE_HDB00 \\ .\" TODO works this for multi-node: rule 8000: score eq 100 .RE .PP - \fB* crm grouped IP address resource configuration\fR - +.PP Let the Linux cluster manage one IP address and move that address along with the HANA primary master nameserver. An auxiliary resource is needed for specific public cloud purpose. - +.PP You should not bind resource to the HANA master role. This would change the effective resource scoring and might prevent the cluster from taking expected actions. If, for any reason, you need to bind additional resource to the @@ -297,16 +297,15 @@ colocation col_ip_with_SLE_HDB00 \\ 8000: grp_ip_SLE_HDB00:Started mst_SAPHanaCon_SLE_HDB00:Promoted .RE .PP - \fB* crm MailTo resource configuration\fR - +.PP The HANA landscape status is stored inside CIB as attribute hana__roles. A healthy HANA master looks like "4:P:master1:master:worker:master". First field is the HANA landscape status. If that status goes to 3 or 2, something has happened to HANA, but the cluster will not perform a takeover. Status 1 will trigger a takeover, status 0 indicates an undefined fatal failure. See manual pages ocf_suse_SAPHanaController(7) and ocf_heartbeat_MailTo(7). - +.PP You could define a MailTo resource that informs you as soon as attribute hana__roles deviates from above ideal: .PP @@ -322,9 +321,8 @@ location loc_mailto_HA1_HDB10_with_prim rsc_mailto_HA1_HDB10 \\ rule hana_ha1_roles eq 4:P:master1:master:worker:master .RE .PP - \fB* check how resource stickiness affects promotion scoring\fR - +.PP SAPHanaSR uses an internal scoring table. The promotion scores for HANA primary and secondary master are in a certain range. The scores used by the Linux cluster should be in the same range. @@ -336,9 +334,8 @@ Linux cluster should be in the same range. # crm_simulate -Ls | grep promotion .RE .PP - \fB* clean up SDB stonith resource after write failure\fR - +.PP In rare cases the SBD stonith resource fails writing to the block device. After the root cause has been found and fixed, the failure message can be cleaned. @@ -348,9 +345,8 @@ cleaned. # stonith_admin --cleanup --history= .RE .PP - \fB* check saphostagent and show SAP instances\fR - +.PP Basic check for the saphostagent. .PP .RS 2 @@ -359,9 +355,8 @@ Basic check for the saphostagent. # /usr/sap/hostctrl/exe/saphostctrl -function ListInstances .RE .PP - \fB* check systemd services for the HANA database\fR - +.PP In case systemd-style init is used for the HANA database, the services can be checked. Example SID is HA1, instance number is 10. .PP @@ -379,14 +374,12 @@ checked. Example SID is HA1, instance number is 10. .\" TODO check Autostart not set. .RE .PP - \fB* show pacemaker service drop-in file\fR - +.PP In case systemd-style init is used for the HANA database, it might be desired to have the SAP instance service stopping after pacemaker at system shutdown. A drop-in file might help. Example SID is S07, instance number is 00. - -.pp +.PP .RS 2 # cat /etc/systemd/system/pacemaker.service.d/00-pacemaker.conf .br @@ -401,9 +394,8 @@ Wants=SAPS07_00.service After=SAPS07_00.service .RE .PP - \fB* check for pacemaker dependency to SAP instance service\fR - +.PP Example SID is S07, instance number is 00. .PP .RS 2 @@ -416,12 +408,14 @@ Example SID is S07, instance number is 00. .PP .\" .SH BUGS +.PP In case of any problem, please use your favourite SAP support process to open a request for the component BC-OP-LNX-SUSE. Please report any other feedback and suggestions to feedback@suse.com. .PP .\" .SH SEE ALSO +.PP \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_suse_SAPHanaFilesystem\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , \fBocf_heartbeat_MailTo\fP(7) , @@ -442,11 +436,12 @@ https://www.suse.com/support/kb/ , https://www.clusterlabs.org .PP .SH AUTHORS -.br +.PP A.Briel, F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT +.PP (c) 2018 SUSE Linux GmbH, Germany. .br (c) 2019-2024 SUSE LLC diff --git a/man/SAPHanaSR_maintenance_examples.7 b/man/SAPHanaSR_maintenance_examples.7 index f653efaa..bbba9086 100644 --- a/man/SAPHanaSR_maintenance_examples.7 +++ b/man/SAPHanaSR_maintenance_examples.7 @@ -1,6 +1,6 @@ -.\" Version: 1.001 +.\" Version: 1.001 .\" -.TH SAPHanaSR_maintenance_examples 7 "08 May 2024" "" "SAPHanaSR" +.TH SAPHanaSR_maintenance_examples 7 "20 Sep 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR_maintenance_examples \- maintenance examples for SAPHanaController. @@ -19,7 +19,7 @@ below. .SH EXAMPLES .PP \fB*\fR Check status of Linux cluster and HANA system replication pair. - +.PP This steps should be performed before doing anything with the cluster, and after something has been done. See also cs_show_saphanasr_status(8) and section REQUIREMENTS below. @@ -37,7 +37,7 @@ REQUIREMENTS below. .RE .PP \fB*\fR Watch status of HANA cluster resources and system replication. - +.PP This might be convenient when performing administrative actions or cluster tests. It does not replace the afore mentioned checks. See also cs_show_saphanasr_status(8). .PP .RS 4 @@ -45,7 +45,7 @@ This might be convenient when performing administrative actions or cluster tests .RE .PP \fB*\fR Overview on stopping the HANA database at one site. - +.PP This procedure does work for scale-up and scale-out. No takeover will be done. This procedure should be used, when it is necessary to stop the HANA database. Stopping the HANA database should not be done by just stopping the Linux cluster or shutting down the OS. This particularly @@ -70,7 +70,7 @@ tuning and stopping an HANA database. Note: Do not forget to end the resource maintenance after you have re-started the HANA database. .PP \fB*\fR Initiate an administrative takeover of the HANA primary from one node to the other by using the Linux cluster. - +.PP This procedure does not work for scale-out. On scale-up, it will stop the HANA primary. This might take a while. If you want to avoid waiting for the stopped primary, use the below procedure which suspends the primary. @@ -105,7 +105,7 @@ After takeover of the primary has been finished, the migration rule has to be de Note: Former versions of the Linux cluster used "migrate" instead of "move" and "unmigrate" instead of "clear". .PP \fB*\fR Perform an SAP HANA takeover by using SAP tools. - +.PP The procedure is described here for scale-out. It works for scale-up as well. The procedure will stop the HANA primary. This might take a while. If you want to avoid waiting for the stopped primary, use the below procedure which suspends @@ -186,7 +186,7 @@ If everything looks fine, proceed. .br ~> cdpy; python3 ./systemReplicationStatus.py; echo RC:$? .br -~> cdpy; python3 ./landscapeConfigurationStatus.py; echo RC:$? +~> cdpy; python3 ./landscapeHostConfiguration.py; echo RC:$? .br ~> exit .br @@ -211,7 +211,7 @@ If everything looks fine, proceed. .RE .PP \fB*\fR Overview on SAP HANA takeover using SAP tools and suspend primary feature. - +.PP The procedure works for scale-up and scale-out. The status of HANA databases, system replication and Linux cluster has to be checked. @@ -251,7 +251,7 @@ site name. .RE .PP \fB*\fR Check the two site names that are known to the Linux cluster. - +.PP This is useful in case AUTOMATED_REGISTER is not yet set. In that case a former primary needs to be registered manually with the former site name as new secondary. The point is finding the site name that already is in use by the Linux cluster. That exact site name has to be used for registration of the new secondary. See also REQUIREMENTS of SAPHanaSR(7) and SAPHanaSR-ScaleOut(7). .br In this example, node is suse11 on the future secondary site to be registered. Remote HANA master nameserver is suse21 on current primary site. Lowercase-SID is ha1. @@ -269,7 +269,7 @@ In this example, node is suse11 on the future secondary site to be registered. R .RE .PP \fB*\fR Manually start the HANA primary if only one site is available. - +.PP This might be necessary in case the cluster can not detect the status of both sites. This is an advanced task. .PP @@ -298,7 +298,7 @@ This is an advanced task. .RE .PP \fB*\fR Start Linux cluster after node has been fenced. - +.PP It is recommended to not configure the Linux cluster for always starting autmatically on boot. Better is to start automatically only, if cluster and/or node have been stopped cleanly. If the node has been rebooted by STONITH, the @@ -316,9 +316,11 @@ STONITH. STONITH via SBD is used in this example. # crm_mon -r .RE .PP -.\" +.\" \fB*\fR Register secondary HANA after takeover. +.\" TODO for AUTOMATED_REGISTER=false +.\" \fB*\fR Overview on maintenance procedure for Linux, HANA remains running, on pacemaker-2.0. - +.PP It is necessary to wait for each step to complete and to check the result. It also is necessary to test and document the whole procedure before applying in production. See also section REQUIREMENTS below and example on checking status @@ -378,7 +380,7 @@ of HANA and cluster above. .PP \fB*\fR Overview on simple procedure for stopping and temporarily disabling the Linux cluster, HANA gets fully stopped. - +.PP This procedure can be used to update HANA, OS or hardware. HANA roles and resource status remains unchanged. It is necessary to wait for each step to complete and to check the result. @@ -415,12 +417,12 @@ It also is necessary to test and document the whole procedure before applying in .br - system replication recovers to SOK .RE - +.PP Note: HANA is not available from step 4 to step 9. .RE .PP \fB*\fR Overview on update procedure for the SAPHanaSR-angi package. - +.PP This procedure can be used to update RAs, HANA HADR provider hook scripts and related tools while HANA and Linux cluster stay online. See also SAPHanaSR-manageAttr(8) for details on reloading the HANA HADR provider. .PP .RS 2 @@ -440,7 +442,7 @@ This procedure can be used to update RAs, HANA HADR provider hook scripts and re .RE .PP \fB*\fR Remove left-over maintenance attribute from overall Linux cluster. - +.PP This could be done to avoid confusion caused by different maintenance procedures. See above overview on maintenance procedures with running Linux cluster. Before doing so, check for cluster attribute maintenance-mode="false". @@ -456,7 +458,7 @@ Before doing so, check for cluster attribute maintenance-mode="false". .RE .PP \fB*\fR Remove left-over standby attribute from Linux cluster nodes. - +.PP This could be done to avoid confusion caused by different maintenance procedures. See above overview on maintenance procedures with running Linux cluster. Before doing so for all nodes, check for node attribute standby="off" on all nodes. @@ -472,7 +474,7 @@ Before doing so for all nodes, check for node attribute standby="off" on all nod .RE .PP \fB*\fR Remove left-over maintenance attribute from resource. - +.PP This should usually not be needed. See above overview on maintenance procedures with running Linux cluster. .PP @@ -485,12 +487,12 @@ See above overview on maintenance procedures with running Linux cluster. .RE .PP \fB*\fR Manually update global site attribute. - +.PP .\" TODO: attributes still used for angi? In rare cases the global site attribute hana__glob_prim or hana__glob_sec is not updated automatically after successful takeover, while all other attributes are updated correctly. The global site attribute -stays outdated even after the cluster has been idle for a while. +stays outdated even after the Linux cluster has been idle for a while. In this case, that site attribute could be updated manually. Make sure everything else is fine and just the global site attribute has not been updated. Updating hana__glob_sec for SID HA1 with site name VOLKACH: @@ -504,7 +506,7 @@ been updated. Updating hana__glob_sec for SID HA1 with site name VOLKACH: .RE .PP \fB*\fR Upgrade scale-out srHook attribute from old-style to multi-target. - +.PP As final result of this upgrade, the RAs and hook script are upgraded from old-style to multi-target. Further the Linux cluster's old-style global srHook attribute hana_${sid}_glob_srHook is replaced by site-aware attributes @@ -542,7 +544,6 @@ l. Finally check if everything looks fine. .PP .\" .SH FILES -.br .PP .\" .SH REQUIREMENTS @@ -564,12 +565,16 @@ being written into CIB attributes. The current HANA SR status might differ from srHook attribute after Linux cluster maintenance. .PP \fB*\fR Manually activating an HANA primary, like start of HANA primary or takeover -outside the cluster creates risk of a duplicate-primary situation. The user is -responsible for data integrity, particularly when activating an HANA primary. See -also susTkOver.py(7). +outside the Linux cluster creates risk of a duplicate-primary situation. The user +is responsible for data integrity, particularly when activating an HANA primary. +See also susTkOver.py(7). +.PP +\fB*\fR When manually disabling or unregistering HANA system replication that is +controlled by the Linux cluster, the SAPHanaController resource needs to be in +maintenance mode. The user is responsible for data integrity. .PP \fB*\fR HANA site names are discovered automatically when the RAs are activated the -very first time. That exact site names have to be used later for all manual tasks. +very first time. That exact site names have to be used later for all manual tasks. .PP .\" .SH BUGS diff --git a/man/SAPHanaSR_upgrade_to_angi.7 b/man/SAPHanaSR_upgrade_to_angi.7 index 025f6227..91181d2d 100644 --- a/man/SAPHanaSR_upgrade_to_angi.7 +++ b/man/SAPHanaSR_upgrade_to_angi.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH SAPHanaSR_upgrade_to_angi 7 "04 Jul 2024" "" "SAPHanaSR" +.TH SAPHanaSR_upgrade_to_angi 7 "08 Aug 2024" "" "SAPHanaSR" .\" .SH NAME SAPHanaSR_upgrade_to_angi \- How to upgrade from SAPHanaSR or SAPHanaSR-ScaleOut to SAPHanaSR-angi. @@ -164,6 +164,8 @@ upgrade. .br 3.5 Add SAPHanaFilesystem resource (optional) .br +3.6 Add SAPHanaSR-alert-fencing agent (optional, scale-out) +.br 4.1 Check for sane state of cluster, HANA and system replication .br 4.2 Test RA on secondary and trigger susHanaSR.py (optional) diff --git a/man/SAPHanaTopology.7 b/man/SAPHanaTopology.7 new file mode 100644 index 00000000..9a164f22 --- /dev/null +++ b/man/SAPHanaTopology.7 @@ -0,0 +1 @@ +.so man7/ocf_suse_SAPHanaTopology.7 diff --git a/man/ocf_suse_SAPHana.7 b/man/ocf_suse_SAPHana.7 index f1af26b1..4eda844f 100644 --- a/man/ocf_suse_SAPHana.7 +++ b/man/ocf_suse_SAPHana.7 @@ -1,6 +1,6 @@ .\" Version: 0.160.1 .\" -.TH ocf_suse_SAPHana 7 "21 Jun 2024" "" "OCF resource agents" +.TH ocf_suse_SAPHana 7 "18 Oct 2024" "" "OCF resource agents" .\" .SH NAME SAPHana \- Manages takeover between two SAP HANA databases with system replication. @@ -119,7 +119,7 @@ Optional, well known directories will be searched by default. .RS 4 Define timeout how long a call to HANA to receive information can take. This could be e.g. landscapeHostConfiguration.py. There are some specific calls to HANA which have their own timeout values. For example the sr_takeover command does not timeout (inf). If the timeout is reached, the return code will be 124. If you increase the timeouts for HANA calls you should also adjust the operation timeouts of your Linux cluster resources. .br -Optional, experimental. Default value: 60. +Optional. Default value: 60. .RE .PP \fBINSTANCE_PROFILE\fR diff --git a/man/ocf_suse_SAPHanaController.7 b/man/ocf_suse_SAPHanaController.7 index e1f73007..94966b78 100644 --- a/man/ocf_suse_SAPHanaController.7 +++ b/man/ocf_suse_SAPHanaController.7 @@ -1,6 +1,6 @@ -.\" Version: 1.001 +.\" Version: 1.2.9 .\" -.TH ocf_suse_SAPHanaController 7 "21 Jun 2024" "" "OCF resource agents" +.TH ocf_suse_SAPHanaController 7 "18 Oct 2024" "" "OCF resource agents" .\" .SH NAME SAPHanaController \- Manages takeover between two SAP HANA databases with system replication. @@ -11,7 +11,7 @@ SAPHanaController \- Manages takeover between two SAP HANA databases with system .PP .\" .SH DESCRIPTION - +.PP \fBSAPHanaController\fP is a resource agent (RA) for SAP HANA databases in scale-up and scale-out setups. It manages takeover for a SAP HANA database with system replication in an OCF promotable clone configuration. .PP @@ -87,7 +87,7 @@ Please see also the REQUIREMENTS section below. .PP .\" .SH SUPPORTED PARAMETERS -.br +.PP This resource agent supports the following parameters: .PP \fBSID\fR @@ -148,7 +148,7 @@ Values: [ proceed | fence ]. .br - fence: trigger stop failure and node fencing, if conditions are matched. .br -Experimental (Optional). Default value: proceed. +Optional. Default value: proceed. .RE .PP \fBPREFER_SITE_TAKEOVER\fR @@ -176,7 +176,7 @@ Optional. Default value: false\&. .PP .\" .SH SUPPORTED PROPERTIES -.br +.PP \fBhana_${sid}_glob_filter\fR .RS 4 Global cluster property \fBhana_${sid}_glob_filter\fR . This property defines which messages are logged by the RA. It should only be set if requested by support engineers. The default is sufficient for normal operation. See also SAPHanaSR-showAttr(8). @@ -218,6 +218,7 @@ Optional. .PP .\" .SH SUPPORTED ACTIONS +.PP This resource agent supports the following actions (operations): .PP \fBstart\fR @@ -285,6 +286,7 @@ Changes parameters without forcing a recover of the resource. Suggested minimum .PP .\" .SH RETURN CODES +.PP The return codes are defined by the OCF cluster framework. Please refer to the OCF definition on the website mentioned below. In addition return code 124 will be logged if HANA_CALL_TIMEOUT has been exceeded. @@ -293,6 +295,7 @@ In addition, log entries are written, which can be scanned by using a pattern li .PP .\" .SH EXAMPLES +.PP * Below is an example configuration for a SAPHanaController multi-state resource in an HANA scale-out performance-optimised scenario. .br The HANA consists of two sites with five nodes each. An additional cluster node is used as majority maker for split-brain situations. In addition, a SAPHanaTopology clone resource is needed to make this work. @@ -394,22 +397,22 @@ OCF_RESKEY_CRM_meta_interval=0 .RE .PP * Use of DUPLICATE_PRIMARY_TIMEOUT and Last Primary Timestamp (LPT) in case the primary node has been crashed completely. - +.PP Typically on each side where the RA detects a running primary a time stamp is written to the node's attributes (last primary seen at time: lpt). If the timestamps ("last primary seen at") differ less than the DUPLICATE_PRIMARY_TIMEOUT then the RA could not automatically decide which of the two primaries is the better one. - +.PP .RS 2 1. nodeA is primary and has a current time stamp, nodeB is secondary and has a secondary marker set: .br nodeA: 1479201695 .br nodeB: 30 - +.PP 2. Now nodeA crashes and nodeB takes over: .br (nodeA: 1479201695) .br nodeB: 1479201700 - +.PP 3. A bit later nodeA comes back into the cluster: .br nodeA: 1479201695 @@ -417,7 +420,7 @@ nodeA: 1479201695 nodeB: 1479202000 .br You see while nodeA keeps its primary down the old timestamp is kept. NodeB increases its timestamp on each monitor run. - +.PP 4. After some more time (depending on the parameter DUPLICATE_PRIMARY_TIMEOUT) .br nodeA: 1479201695 @@ -425,13 +428,13 @@ nodeA: 1479201695 nodeB: 1479208895 .br Now the time stamps differ >= DUPLICATE_PRIMARY_TIMEOUT. The algorithm defines nodeA now as "the looser" and depending on the AUTOMATED_REGISTER the nodeA will become the secondary. - +.PP 5. NodeA would be registered: .br nodeA: 10 .br nodeB: 1479208900 - +.PP 6. Some time later the secondary gets into sync .br nodeA: 30 @@ -440,34 +443,34 @@ nodeB: 1479209100 .RE .PP * Use of DUPLICATE_PRIMARY_TIMEOUT and Last Primary Timestamp (LPT) in case the the database on primary node has been crashed, but the node is still alive. - +.PP Typically on each side where the RA detects a running primary a time stamp is written to the node's attributes (last primary seen at time: lpt). If the timestamps ("last primary seen at") differ less than the DUPLICATE_PRIMARY_TIMEOUT then the RA could not automatically decide which of the two primaries is the better one. - +.PP .RS 2 1. nodeA is primary and has a current time stamp, nodeB is secondary and has a secondary marker set: .br nodeA: 1479201695 .br nodeB: 30 - +.PP 2. Now HANA on nodeA crashes and nodeB takes over: .br nodeA: 1479201695 .br nodeB: 1479201700 - +.PP 3. As the cluster could be sure to properly stopped the HANA instance at nodeA it *immediately* marks the old primary to be a register candidate, if AUTOMATED_REGISTER is true: .br nodeA: 10 .br nodeB: 1479201760 - +.PP 4. Depending on the AUTOMATED_REGISTER parameter the RA will also immediately regisiter the former primary to become the new secondary: .br nodeA: 10 .br nodeB: 1479201820 - +.PP 5. And after a while the secondary gets in sync .br nodeA: 30 @@ -476,7 +479,7 @@ nodeB: 1479202132 .RE .PP * Set parameter AUTOMATED_REGISTER="true". See SUPPORTED PARAMETERS section above for details. - +.PP .RS 4 # crm_resource -r rsc_SAPHanaCon_HA1_HDB00 -p AUTOMATED_REGISTER -v true .br @@ -508,6 +511,7 @@ default path for DIR_PROFILE .PP .\" .SH REQUIREMENTS +.PP For the current version of the SAPHanaController resource agent that comes with the software package SAPHanaSR-angi, the support is limited to the scenarios and parameters described in the manual pages SAPHanaSR(7) and SAPHanaSR-ScaleOut(7). .PP .\" diff --git a/man/susHanaSR.py.7 b/man/susHanaSR.py.7 index 05afc369..a0cf2b05 100644 --- a/man/susHanaSR.py.7 +++ b/man/susHanaSR.py.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH susHanaSR.py 7 "01 Mar 2024" "" "SAPHanaSR" +.TH susHanaSR.py 7 "24 Sep 2024" "" "SAPHanaSR" .\" .SH NAME susHanaSR.py \- Provider for SAP HANA srHook method srConnectionChanged(). @@ -69,7 +69,7 @@ Usage, syntax or execution errors. \fB*\fR Example for entry in sudo permissions /etc/sudoers.d/SAPHanaSR .PP .RS 2 -# SAPHanaSR (Scale-Up) needs for srHook +# SAPHanaSR needs for srHook .br ha1adm ALL=(ALL) NOPASSWD: /usr/sbin/crm_attribute -n hana_ha1_site_srHook_* .RE @@ -129,6 +129,33 @@ register_secondaries_on_takeover = true ... .RE .PP +* Example for entry in SAP HANA 2.0 nameserver configuration +/hana/shared/$SID/global/hdb/custom/config/nameserver.ini for two-node scale-out HANA without standby nodes. +.br +This entry is needed at both sites that are managed by the Linux cluster. +The HANA has to be stopped before the files can be edited. Do not copy the file +between nodes. It might be necessary to un-register and re-register the secondary +to make the change effective on the secondary site. +Please check SAP HANA documentation for details. In this example the master node +is suse11, the worker is suse12, the instance number is 00. See also +SAPHanaSR-ScaleOut_basic_cluster(7). +.PP +.RS 2 +[landscape] + ... +.br +master = suse11:31001 +.br +worker = suse11 suse12 +.br +active_master = suse11:31001 +.br +roles_suse11 = worker +.br +roles_suse12 = worker + ... +.RE +.PP \fB*\fR Example for checking the system log for srHook setting HANA system replication status in the CIB properties section. .br To be executed on respective HANA primary site's master nameserver. @@ -148,7 +175,7 @@ To be executed on respective HANA primary site's master nameserver. .br ~> grep susHanaSR.srConnectionChanged.*called nameserver_*.trc .br -~> grep crm_attribute.*susHanaSR nameserver_*.trc +~> grep susHanaSR.*crm_attribute nameserver_*.trc .br # exit .RE @@ -265,6 +292,9 @@ the hook provider, delivered with the RPM /hana/shared/$SID/global/hdb/custom/config/global.ini the on-disk representation of HANA global system configuration .TP +/hana/shared/$SID/global/hdb/custom/config/nameserver.ini +the on-disk representation of HANA nameserver configuration +.TP /etc/sudoers , /etc/sudoers.d/* the sudo permissions configuration .TP @@ -276,6 +306,7 @@ the internal cache for srHook status changes while Linux cluster is down, file i .PP .\" .SH REQUIREMENTS +.PP 1. SAP HANA 2.0 SPS05 rev.059.04 or later provides Python 3 as well as the HA/DR provider hook method srConnectionChanged() with multi-target aware parameters. The Python 3 and multi-target aware parameters are needed for the SAPHanaSR-angi @@ -299,12 +330,14 @@ in memory and on disk (in persistence). Linux cluster. .\" .SH BUGS +.PP In case of any problem, please use your favourite SAP support process to open a request for the component BC-OP-LNX-SUSE. Please report any other feedback and suggestions to feedback@suse.com. .PP .\" .SH SEE ALSO +.PP \fBSAPHanaSR-angi\fP(7) , \fBocf_suse_SAPHanaTopology\fP(7) , \fBocf_suse_SAPHanaController\fP(7) , \fBocf_heartbeat_IPaddr2\fP(7) , @@ -320,10 +353,12 @@ https://help.sap.com/docs/SAP_HANA_PLATFORM/6b94445c94ae495c83a19646e7c3fd56/5df .PP .\" .SH AUTHORS +.PP A.Briel, F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT +.PP (c) 2015-2017 SUSE Linux GmbH, Germany. .br (c) 2018-2024 SUSE LLC diff --git a/man/susHanaSrMultiTarget.py.7 b/man/susHanaSrMultiTarget.py.7 index 0bbbcb02..e2eb38fd 100644 --- a/man/susHanaSrMultiTarget.py.7 +++ b/man/susHanaSrMultiTarget.py.7 @@ -1,6 +1,6 @@ .\" Version: 1.001 .\" -.TH susHanaSrMultiTarget.py 7 "18 Apr 2023" "" "SAPHanaSR-ScaleOut" +.TH susHanaSrMultiTarget.py 7 "09 Jul 2024" "" "SAPHanaSR-ScaleOut" .\" .SH NAME susHanaSrMultiTarget.py \- Provider for multi-target aware SAP HANA srHook @@ -167,8 +167,11 @@ register_secondaries_on_takeover = true /hana/shared/$SID/global/hdb/custom/config/nameserver.ini for two-node scale-out HANA without standby nodes. .br This entry is needed at both sites that are managed by the Linux cluster. -The HANA has to be stopped before the files can be edited. -Do not copy the file between nodes. Please check SAP HANA documentation for details. In this example the master node is suse11, the worker is suse12. +The HANA has to be stopped before the files can be edited. Do not copy the file +between nodes. It might be necessary to un-register and re-register the secondary +to make the change effective on the secondary site. +Please check SAP HANA documentation for details. In this example the master node +is suse11, the worker is suse12. .PP .RS 2 [landscape] @@ -357,7 +360,7 @@ A.Briel, F.Herschel, L.Pinne. .PP .\" .SH COPYRIGHT -(c) 2020-2023 SUSE LLC +(c) 2020-2024 SUSE LLC .br susHanaSrMultiTarget.py comes with ABSOLUTELY NO WARRANTY. .br From 514ed0ebbd85ae03472f4a1ac749e036ba88f47f Mon Sep 17 00:00:00 2001 From: lpinne Date: Wed, 6 Nov 2024 09:00:10 +0100 Subject: [PATCH 2/2] SAPHanaSR-angi-scenarios.7: wording --- man/SAPHanaSR-angi-scenarios.7 | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/man/SAPHanaSR-angi-scenarios.7 b/man/SAPHanaSR-angi-scenarios.7 index e5eea42a..a3cafd57 100644 --- a/man/SAPHanaSR-angi-scenarios.7 +++ b/man/SAPHanaSR-angi-scenarios.7 @@ -1,6 +1,6 @@ .\" Version: 1.2 .\" -.TH SAPHanaSR-angi-scenarios 7 "28 Oct 2024" "" "SAPHanaSR-angi" +.TH SAPHanaSR-angi-scenarios 7 "05 Nov 2024" "" "SAPHanaSR-angi" .\" .SH NAME SAPHanaSR-angi-scenarios \- SAP HANA system replication scenarios. @@ -14,7 +14,7 @@ possible. .PP \fB* Scenarios overview\fB .PP -The table below shows the known SUSE HA scenarios for HANA system replication +The table shows the known SAPHanaSR-angi HA scenarios for HANA system replication for the two HANA topologies. The current support status is also shown. .PP .\" see man tbl and https://technicallywewrite.com/2023/09/23/tblexample @@ -58,7 +58,7 @@ Scale-Out@perf-opt, up to 12 nodes, no standby (BW)@Supported @two perf-opt clusters connected@Non-supported .TE .PP -.RS 8 +.RS 4 Note: One additional Linux cluster node at 3rd site is needed for all scale-out scenarios. .RE .\" TODO align wording with "Supported HA Solutions" @@ -68,11 +68,12 @@ setup has been tested and is documented in a setup guide. It is supported by SUSE. .PP \fBUndocumented\fP - the scenario is expected to work for the given topology. -The setup is currently not documented in a setup guide. SUSE services are prepared to help with implementation. Once the setutp is working, it is supported by -SUSE. +The setup is currently not documented in a setup guide. SUSE services are +prepared to help with implementation. Once the setutp is working, it could be +supported by SUSE. .PP \fBNon-supported\fP - the scenario is expected not to work. It is not supported -by SUSE. +by SUSE. .PP For details on requirements and configuration of the scenarios, please refer to manual pages SAPHanaSR-angi(7), SAPHanaSR(7) and SAPHanaSR-ScaleOut(7), as well as the respective setup guides. .PP