From bb153cd502bfb28e61486a8309aa27fb22bb9f36 Mon Sep 17 00:00:00 2001 From: Barry O'Donovan Date: Thu, 23 May 2024 20:32:36 +0100 Subject: [PATCH] [NF] Nagios script to check for any routers stuck in locked state --- .../Controllers/Api/V4/RouterController.php | 50 +++++++++++++++ routes/apiv4-ext-auth-superuser.php | 2 + .../nagios-check-locked-routers.php | 62 +++++++++++++++++++ 3 files changed, 114 insertions(+) create mode 100644 tools/runtime/route-servers/nagios-check-locked-routers.php diff --git a/app/Http/Controllers/Api/V4/RouterController.php b/app/Http/Controllers/Api/V4/RouterController.php index e820582e6..1d14ae560 100644 --- a/app/Http/Controllers/Api/V4/RouterController.php +++ b/app/Http/Controllers/Api/V4/RouterController.php @@ -212,6 +212,56 @@ public function getAllLastUpdatedBefore( int $threshold ): JsonResponse return response()->json( $result ); } + /** + * Find any routers that are stuck in a configuration upgrade / locked for longer than $threashold seconds + * + * Returns the JSON version of the array: + * + * [ + * "handle" => [ + * "last_update_started" => "2024-05-23T19:55:29+01:00", + * "last_update_started_unix" => 1716490529, + * "last_updated" => '2024-05-23T19:55:28+01:00', + * "last_updated_unix" => 1716490528 + * ], + * ... + * ] + * + * @param int $threshold + * + * @return JsonResponse + */ + public function getAllLockedLongerThan( int $threshold ): JsonResponse + { + $result = []; + foreach( Router::all() as $r ) { + + if( $r->pause_updates ) { + continue; // skip paused routers + } + + if( !$r->last_update_started && !$r->last_updated ) { + continue; // never updated / never used + } + + if( $r->last_update_started && $r->last_updated && $r->last_updated->gte( $r->last_update_started ) ) { + continue; + } + + if( !$r->last_updated && $r->last_update_started->diffInSeconds( Carbon::now() ) > $threshold ) { + $result[ $r->handle ] = $this->lastUpdatedArray( $r ); + continue; + } + + if( $r->last_updated && $r->last_updated->diffInSeconds( $r->last_update_started ) >= $threshold ) { + $result[ $r->handle ] = $this->lastUpdatedArray( $r ); + } + + } + + return response()->json( $result ); + } + /** * Format the router's last updated datetime as an array * diff --git a/routes/apiv4-ext-auth-superuser.php b/routes/apiv4-ext-auth-superuser.php index 91ff68ffd..734fc5598 100644 --- a/routes/apiv4-ext-auth-superuser.php +++ b/routes/apiv4-ext-auth-superuser.php @@ -93,6 +93,8 @@ Route::get('updated', 'RouterController@getAllLastUpdated' ); Route::get('updated-before/{threshold}', 'RouterController@getAllLastUpdatedBefore' ); + + Route::get('locked-longer-than/{threshold}', 'RouterController@getAllLockedLongerThan' ); }); /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/runtime/route-servers/nagios-check-locked-routers.php b/tools/runtime/route-servers/nagios-check-locked-routers.php new file mode 100644 index 000000000..461d68058 --- /dev/null +++ b/tools/runtime/route-servers/nagios-check-locked-routers.php @@ -0,0 +1,62 @@ +$threshold seconds ago +$s = curl_init(); +curl_setopt( $s, CURLOPT_URL, $url . '/' . $threshold ); +curl_setopt( $s, CURLOPT_HTTPHEADER, [ 'X-IXP-Manager-API-Key: ' . $key ] ); +curl_setopt( $s, CURLOPT_RETURNTRANSFER, true ); +$json = curl_exec($s); + +if( !curl_getinfo($s,CURLINFO_HTTP_CODE) == 200 ) { + echo "UNKNOWN: non-200 status code returned by API: " . curl_getinfo($s,CURLINFO_HTTP_CODE) . "\n"; + exit( 3 ); +} + +if( $json === "[]" ) { + echo sprintf( "OK: no routers stuck mid-configuration for >%d seconds\n", $threshold ); + exit(0); +} + +if( !( $routers = json_decode( $json ) ) ) { + echo "UNKNOWN: could not decode JSON response from API\n"; + exit( 3 ); +} + +$bad = []; +foreach( $routers as $handle => $r ) { + $bad[] = $handle; +} + +echo 'ERROR: the following router(s) have been locked for more than ' . $threshold . 'secs: ' . implode( ', ', $bad ) . ".\n"; +exit(2);