Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add connection health check #152

Merged
merged 11 commits into from
Jul 4, 2024
63 changes: 61 additions & 2 deletions src/howitz/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from zinolib.controllers.zino1 import Zino1EventManager, RetryError, EventClosedError, UpdateHandler, LostConnectionError
from zinolib.event_types import Event, AdmState, PortState, BFDState, ReachabilityState, LogEntry, HistoryEntry
from zinolib.compat import StrEnum
from zinolib.ritz import AuthenticationError
from zinolib.ritz import AuthenticationError, ProtocolError

from howitz.users.utils import authenticate_user

Expand Down Expand Up @@ -149,6 +149,41 @@ def connect_to_zino(username, token):
connect_to_updatehandler()


def reconnect_to_zino():
current_app.logger.info('Attempting reconnect to Zino')

# Disconnect completely from Zino in case of any dangling connection state
current_app.event_manager.disconnect()

# Reconnect to Zino with existing credentials
connect_to_zino(current_user.username, current_user.token)

# Make sure that EventManager is populated with data after re-connect
current_app.event_manager.get_events()

# Re-fetch the event list and update event data in cache and session
# This is needed in case there were NTIE updates that occurred while connection was down, so that they are not lost until manual page refresh
events = current_app.event_manager.events
current_app.cache.set("events", events) # Update cache
session["event_ids"] = list(events.keys()) # Update session
session["events_last_refreshed"] = None # Mark current event table in UI as outdated
session.modified = True


def test_zino_connection():
try:
current_app.event_manager.test_connection() # Fetches event with fake id
return True
except ProtocolError: # Event ID unknown, but connection is OK
return True
except TimeoutError as e:
current_app.logger.exception('TimeoutError when testing Zino connection %s', e)
return False
except Exception as e:
current_app.logger.exception('Unexpected error when testing Zino connection %s', e)
return None # Uncertain connection state, let the caller decide how to interpret it


def clear_ui_state():
with current_app.app_context():
session["selected_events"] = {}
Expand Down Expand Up @@ -502,7 +537,7 @@ def auth():
def get_events():
table_events = get_current_events()

return render_template('components/table/events-table-body.html', event_list=table_events, refresh_interval=current_app.howitz_config["refresh_interval"])
return render_template('responses/get-events-table.html', event_list=table_events, refresh_interval=current_app.howitz_config["refresh_interval"])


@main.route('/refresh_events')
Expand All @@ -518,6 +553,30 @@ def refresh_events():
removed_event_list=removed_events, added_event_list=added_events)


@main.route('/test_connection')
def test_conn():
is_connection_ok = test_zino_connection()
caller_id = request.headers.get('HX-Target', None)
if is_connection_ok is False:
current_app.logger.debug('Connection test failed showing error appbar')
return render_template('components/feedback/connection-status-bar/error-appbar-content.html',
error_message="Connection to Zino server is lost")

if is_connection_ok is None: # Uncertain connection state
try: # Attempt a quiet reconnect
reconnect_to_zino()
except:
pass
response = make_response()
response.headers['HX-Reswap'] = 'none'
return response

if caller_id == 'connection-error-content': # If connection should be restored after error
reconnect_to_zino()
current_app.logger.debug('Connection test OK, caller ID %s', caller_id)
return render_template('components/feedback/connection-status-bar/success-appbar-content.html')


@main.route('/events/<event_id>/expand_row', methods=["GET"])
def expand_event_row(event_id):
event_id = int(event_id)
Expand Down
29 changes: 10 additions & 19 deletions src/howitz/error_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from flask_login import current_user
from werkzeug.exceptions import HTTPException, BadGateway

from howitz.endpoints import connect_to_zino
from howitz.endpoints import reconnect_to_zino, test_zino_connection
from howitz.utils import serialize_exception


Expand Down Expand Up @@ -100,38 +100,29 @@ def handle_lost_connection(e):
current_app.logger.error("Lost connection to Zino server: %s", e.args[0])

if current_user.is_authenticated: # Re-connect to Zino with existing credentials and inform user that there was an error via alert pop-up
if current_app.event_manager.is_connected:
current_app.event_manager.disconnect()

connect_to_zino(current_user.username, current_user.token)

# Make sure that EventManager is populated with data after re-connect
current_app.event_manager.get_events()

# Re-fetch the event list and update event data in cache and session
# This is needed in case there were NTIE updates that occurred while connection was down, so that they are not lost until manual page refresh
events = current_app.event_manager.events
current_app.cache.set("events", events) # Update cache
session["event_ids"] = list(events.keys()) # Update session
session["events_last_refreshed"] = None # Mark current event table in UI as outdated
session.modified = True

alert_random_id = str(uuid.uuid4())
try:
short_err_msg = e.args[0]
except IndexError:
short_err_msg = 'Temporarily lost connection to Zino server'
short_err_msg = 'Lost connection to Zino server'

if not "errors" in session:
session["errors"] = dict()
session["errors"][str(alert_random_id)] = serialize_exception(e)
session.modified = True

# Check if connection is still down
should_attempt_reconnect = test_zino_connection()
if should_attempt_reconnect is not False: # Both True or None options are acceptable
reconnect_to_zino() # Ensure a clean reconnect to Zino server and re-populate the events data
short_err_msg = 'Temporarily lost connection to Zino server, please retry your action'

response = make_response(render_template('/components/popups/alerts/error/error-alert.html',
alert_id=alert_random_id, short_err_msg=short_err_msg))
response.headers['HX-Reswap'] = 'beforeend'
return response, 503
else: # Redirect to /login for complete re-authentication
current_app.event_manager.disconnect()
res = make_response()
res.headers['HX-Redirect'] = '/login'
return res
return res, 401
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<div
id="connection-state-appbar"
>
<div id="connection-status-bar"
class="relative isolate flex justify-center gap-x-6 overflow-hidden bg-teal-700 px-6 py-2.5"
>
<div class="absolute left-[max(-7rem,calc(50%-52rem))] top-1/2 -z-10 -translate-y-1/2 transform-gpu blur-2xl"
aria-hidden="true">
<div class="aspect-[577/310] w-[36.0625rem] bg-gradient-to-r from-teal-200 to-teal-900 opacity-30"
style="clip-path: polygon(74.8% 41.9%, 97.2% 73.2%, 100% 34.9%, 92.5% 0.4%, 87.5% 0%, 75% 28.6%, 58.5% 54.6%, 50.1% 56.8%, 46.9% 44%, 48.3% 17.4%, 24.7% 53.9%, 0% 27.9%, 11.9% 74.2%, 24.9% 54.1%, 68.6% 100%, 74.8% 41.9%)"></div>
</div>
<div class="absolute left-[max(45rem,calc(50%+8rem))] top-1/2 -z-10 -translate-y-1/2 transform-gpu blur-2xl"
aria-hidden="true">
<div class="aspect-[577/310] w-[36.0625rem] bg-gradient-to-r from-teal-200 to-teal-900 opacity-30"
style="clip-path: polygon(74.8% 41.9%, 97.2% 73.2%, 100% 34.9%, 92.5% 0.4%, 87.5% 0%, 75% 28.6%, 58.5% 54.6%, 50.1% 56.8%, 46.9% 44%, 48.3% 17.4%, 24.7% 53.9%, 0% 27.9%, 11.9% 74.2%, 24.9% 54.1%, 68.6% 100%, 74.8% 41.9%)"></div>
</div>
<div class="flex flex-wrap items-center gap-x-4 gap-y-2">
<p class="text-sm leading-6 text-gray-950">
<strong class="font-semibold">Connected to Zino</strong>
<svg viewBox="0 0 2 2" class="mx-2 inline h-0.5 w-0.5 fill-current" aria-hidden="true">
<circle cx="1" cy="1" r="1"/>
</svg>
Fetching events
</p>
</div>
</div>

</div>
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<div
id="connection-state-appbar"
hx-swap-oob="true"
hx-get="/test_connection"
hx-trigger="every 60s"
hx-swap="outerHTML"
hx-target="find div"
>
{% include "components/feedback/connection-status-bar/success-appbar-content.html" %}
</div>
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{# Implementation inspired by https://tailwindui.com/components/marketing/elements/banners#component-8904b9d9a9fbb9a2313df3975112f9d7 #}
<div
id="connection-error-content"
role="alert"
class="relative isolate flex justify-center gap-x-6 overflow-hidden bg-rose-700 px-6 py-2.5">
<div class="absolute left-[max(-7rem,calc(50%-52rem))] top-1/2 -z-10 -translate-y-1/2 transform-gpu blur-2xl"
aria-hidden="true">
<div class="aspect-[577/310] w-[36.0625rem] bg-gradient-to-r from-rose-200 to-rose-900 opacity-30"
style="clip-path: polygon(74.8% 41.9%, 97.2% 73.2%, 100% 34.9%, 92.5% 0.4%, 87.5% 0%, 75% 28.6%, 58.5% 54.6%, 50.1% 56.8%, 46.9% 44%, 48.3% 17.4%, 24.7% 53.9%, 0% 27.9%, 11.9% 74.2%, 24.9% 54.1%, 68.6% 100%, 74.8% 41.9%)"></div>
</div>
<div class="absolute left-[max(45rem,calc(50%+8rem))] top-1/2 -z-10 -translate-y-1/2 transform-gpu blur-2xl"
aria-hidden="true">
<div class="aspect-[577/310] w-[36.0625rem] bg-gradient-to-r from-rose-200 to-rose-900 opacity-30"
style="clip-path: polygon(74.8% 41.9%, 97.2% 73.2%, 100% 34.9%, 92.5% 0.4%, 87.5% 0%, 75% 28.6%, 58.5% 54.6%, 50.1% 56.8%, 46.9% 44%, 48.3% 17.4%, 24.7% 53.9%, 0% 27.9%, 11.9% 74.2%, 24.9% 54.1%, 68.6% 100%, 74.8% 41.9%)"></div>
</div>
<div class="flex flex-wrap items-center gap-x-4 gap-y-2">
<p class="text-sm leading-6 text-gray-950">
<strong class="font-semibold">Connection timed out</strong>
<svg viewBox="0 0 2 2" class="mx-2 inline h-0.5 w-0.5 fill-current" aria-hidden="true">
<circle cx="1" cy="1" r="1"/>
</svg>
{{ error_message }}
</p>
</div>
</div>
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{# Implementation inspired by https://tailwindui.com/components/marketing/elements/banners#component-8904b9d9a9fbb9a2313df3975112f9d7 #}
<div
id="connection-success-content"
role="status"
class="relative isolate flex justify-center gap-x-6 bg-teal-900 overflow-hidden px-6 py-0.5">
<div class="absolute left-[max(-7rem,calc(50%-52rem))] top-1/2 -z-10 -translate-y-1/2 transform-gpu blur-2xl"
aria-hidden="true">
<div class="aspect-[577/310] w-[36.0625rem] bg-gradient-to-r from-teal-200 to-teal-950 opacity-30"
style="clip-path: polygon(74.8% 41.9%, 97.2% 73.2%, 100% 34.9%, 92.5% 0.4%, 87.5% 0%, 75% 28.6%, 58.5% 54.6%, 50.1% 56.8%, 46.9% 44%, 48.3% 17.4%, 24.7% 53.9%, 0% 27.9%, 11.9% 74.2%, 24.9% 54.1%, 68.6% 100%, 74.8% 41.9%)"></div>
</div>
<div class="absolute left-[max(45rem,calc(50%+8rem))] top-1/2 -z-10 -translate-y-1/2 transform-gpu blur-2xl"
aria-hidden="true">
<div class="aspect-[577/310] w-[36.0625rem] bg-gradient-to-r from-teal-200 to-teal-950 opacity-30"
style="clip-path: polygon(74.8% 41.9%, 97.2% 73.2%, 100% 34.9%, 92.5% 0.4%, 87.5% 0%, 75% 28.6%, 58.5% 54.6%, 50.1% 56.8%, 46.9% 44%, 48.3% 17.4%, 24.7% 53.9%, 0% 27.9%, 11.9% 74.2%, 24.9% 54.1%, 68.6% 100%, 74.8% 41.9%)"></div>
</div>
</div>
3 changes: 3 additions & 0 deletions src/howitz/templates/responses/get-events-table.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% include "components/table/events-table-body.html" %}

{% include "components/feedback/connection-status-bar/connection-appbar.html" %}
1 change: 1 addition & 0 deletions src/howitz/templates/views/events.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
{% include "/components/navbar/navbar.html"%}

<h1 class="sr-only">Events table</h1>
{% include "components/feedback/connection-status-bar/appbar-placeholder.html" %}
<main class="relative shadow-md sm:rounded-lg">

{% include "/components/toolbar/table-toolbar.html"%}
Expand Down