Skip to content

Commit

Permalink
Merge pull request #458 from RUGSoftEng/backend/uptime_checking
Browse files Browse the repository at this point in the history
Backend/uptime checking
  • Loading branch information
TheVeggydude authored Jun 25, 2018
2 parents d982196 + 3d45f4c commit a665da0
Show file tree
Hide file tree
Showing 13 changed files with 367 additions and 19 deletions.
8 changes: 7 additions & 1 deletion pydash/pydash_app/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import pydash_app.dashboard.services.fetching
import pydash_app.dashboard.services.seeding
import pydash_app.dashboard.services.pinging
import pydash_app.dashboard


Expand All @@ -26,10 +27,15 @@ def stop_task_scheduler():
def schedule_periodic_tasks():
"""Schedules all periodic tasks using the default task scheduler, which is declared in pydash.periodic_tasks."""
import datetime # <- remove this line when custom interval no longer necessary for testing.
dashboard.services.fetching.schedule_all_periodic_dashboards_tasks(

pydash_app.dashboard.services.fetching.schedule_all_periodic_dashboards_tasks(
interval=datetime.timedelta(minutes=1)
)

# pydash_app.dashboard.services.pinging.schedule_all_periodic_dashboard_pinging(
# interval=datetime.timedelta(seconds=15)
# )

pydash_app.user.services.pruning.schedule_periodic_pruning_task()


Expand Down
129 changes: 129 additions & 0 deletions pydash/pydash_app/dashboard/downtime.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""
Exposes the class DowntimeLog, which keeps track of a web service's downtime,
and calculates downtime intervals, total downtime, and downtime percentage
in an on-line manner.
"""

import persistent
import datetime
from collections import defaultdict


class DowntimeLog(persistent.Persistent):
"""
Keeps track of downtime, and calculates downtime intervals, total downtime, and downtime percentage
in an on-line manner.
"""

def __init__(self):
self._downtime_intervals = defaultdict(list) # datetime.date -> list[(datetime.time, datetime.time)]
self._total_downtime = defaultdict(datetime.timedelta) # datetime.date -> datetime.timedelta

self._downtime_start = None

def add_ping_result(self, is_up, ping_datetime=datetime.datetime.now(tz=datetime.timezone.utc)):
"""
Add the result of a ping request to the downtime log.
:param is_up: Whether the web service is up or not.
:param ping_datetime: When the ping took place (approximately); defaults to the current time in UTC.
"""
if is_up:
if self._downtime_start:
# Split the downtime into intervals of at most 24 hours

start = self._downtime_start
end = min(ping_datetime, _day_end(start))

while start <= ping_datetime:
date = start.date()
interval = (start.timetz(), end.timetz())
self._downtime_intervals[date].append(interval)
self._total_downtime[date] += (end - start) + datetime.timedelta(microseconds=1)

start = _day_start(start + datetime.timedelta(days=1))
end = min(ping_datetime, _day_end(start))

self._downtime_start = None
else:
if self._downtime_start is None:
self._downtime_start = ping_datetime

def get_downtime_intervals(
self,
start=datetime.datetime.now(tz=datetime.timezone.utc).date() - datetime.timedelta(days=90),
end=datetime.datetime.now(tz=datetime.timezone.utc).date()):
"""
Return the intervals of downtime per day between two dates.
:param start: The start date (exclusive; defaults to 90 days before the current day).
:param end: The end date (inclusive; defaults to the current day).
:return: A dict containing a list of downtime intervals per day.
"""
if end <= start:
ValueError('Date range cannot be negative or zero')

return {
date.strftime('%Y-%m-%d'): list(self._downtime_intervals[date])
for date in _date_range(start, end)
}

def get_total_downtimes(
self,
start=datetime.datetime.now(tz=datetime.timezone.utc).date() - datetime.timedelta(days=90),
end=datetime.datetime.now(tz=datetime.timezone.utc).date()):
"""
Return the total amounts of downtime per day between two dates.
:param start: The start date (exclusive; defaults to 90 days before the current day).
:param end: The end date (inclusive; defaults to the current day).
:return: A dict containing the total downtime per day.
"""
if end <= start:
raise ValueError('Date range cannot be negative or zero')

return {
date.strftime('%Y-%m-%d'): self._total_downtime[date]
for date in _date_range(start, end)
}

def get_downtime_percentage(
self,
start=datetime.datetime.now(tz=datetime.timezone.utc).date() - datetime.timedelta(days=90),
end=datetime.datetime.now(tz=datetime.timezone.utc).date()):
"""
Get the percentage of downtime between two dates.
:param start: The start date (exclusive; defaults to 90 days before the current day).
:param end: The end date (inclusive; defaults to the current day).
:return: A float, the downtime percentage for the given date range.
"""
if end <= start:
raise ValueError('Date range cannot be negative or zero')

total_downtime = sum(
(self._total_downtime[date] for date in _date_range(start, end)),
datetime.timedelta(0)
)

total_time = end - start

percentage = total_downtime/total_time*100

return percentage


def _day_start(dt):
return datetime.datetime.combine(dt.date(), datetime.time.min).replace(tzinfo=datetime.timezone.utc)


def _day_end(dt):
return datetime.datetime.combine(dt.date(), datetime.time.max).replace(tzinfo=datetime.timezone.utc)


def _date_range(start, end):
"""
Yield dates in the range (start, end].
:param start: Start date (exclusive).
:param end: End date.
"""
start += datetime.timedelta(days=1)
while start <= end:
yield start
start += datetime.timedelta(days=1)
44 changes: 41 additions & 3 deletions pydash/pydash_app/dashboard/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@
import uuid
import persistent
from enum import Enum
from datetime import datetime, timedelta, timezone

from pydash_app.dashboard.downtime import DowntimeLog

from pydash_app.dashboard.endpoint import Endpoint
from ..dashboard.aggregator import Aggregator
from pydash_app.dashboard.aggregator.aggregator_group import AggregatorGroup, truncate_datetime_by_granularity


class DashboardState(Enum):
"""
The DashboardState enum indicates the state in which a Dashboard can remain, regarding remote fetching:
Expand Down Expand Up @@ -85,13 +87,16 @@ class Dashboard(persistent.Persistent):
This task is handled by the `dashboard_repository`.
"""

def __init__(self, url, token, user_id, name=None):
def __init__(self, url, token, user_id, name=None, monitor_downtime=False):
if not isinstance(url, str) or not isinstance(token, str):
raise TypeError("Dashboard expects both url and token to be strings.")

if name is not None and not isinstance(name, str):
raise TypeError("Dashboard expects name to be a string.")

if not isinstance(monitor_downtime, bool):
raise TypeError("Dashboard expects monitor_downtime to be a string.")

# Make sure integers and strings are allowed as well.
if not isinstance(user_id, uuid.UUID):
user_id = uuid.UUID(user_id)
Expand All @@ -111,6 +116,9 @@ def __init__(self, url, token, user_id, name=None):
self._endpoint_calls = [] # list of unfiltered endpoint calls, for use with an aggregator.
self._aggregator_group = AggregatorGroup()

self.monitor_downtime = monitor_downtime
self._downtime_log = DowntimeLog()

def __repr__(self):
return f'<{self.__class__.__name__} id={self.id} url={self.url}>'

Expand Down Expand Up @@ -262,7 +270,7 @@ def statistic(self, statistic, filters={}):
:raises KeyError: This happens when the statistic is not supported by the dashboard.
"""
return self._aggregator_group.fetch_aggregator(filters).as_dict()[statistic]

def statistic_per_timeslice(self, statistic, timeslice, timeslice_is_static, start_datetime, end_datetime, filters={}):
"""
Slices up the specified datetime range (=[start_datetime, end_datetime)) into slices of the size of `timeslice`.
Expand Down Expand Up @@ -334,3 +342,33 @@ def statistic_per_timeslice(self, statistic, timeslice, timeslice_is_static, sta
return_dict[datetime] = aggregator.as_dict()[statistic]

return return_dict

def add_ping_result(self, is_up, ping_datetime=datetime.now(tz=timezone.utc)):
"""
Adds the result of a ping request to the dashboard.
:param is_up: Whether the dashboard's web service is up.
:param ping_datetime: When the ping took place (approximately); defaults to the current time in UTC.
"""
self._downtime_log.add_ping_result(is_up, ping_datetime)

def get_downtime_data(
self,
start=datetime.now(tz=timezone.utc).date() - timedelta(days=90),
end=datetime.now(tz=timezone.utc).date()):
"""
Returns a dict containing this dashboard's downtime data for a given date range.
:param start: The start date (exclusive; defaults to 90 days before the current date).
:param end: The end date (inclusive; defaults to the current date).
:return: A dictionary containing the dashboard's downtime data in the given date range.
"""
return {
'downtime_intervals': self._downtime_log.get_downtime_intervals(start, end),
'total_downtimes': self._downtime_log.get_total_downtimes(start, end),
'downtime_percentage': self._downtime_log.get_downtime_percentage(start, end)
}

# Required because `multi_indexed_collection` puts dashboards in a set,
# that needs to order its keys for fast lookup.
# Because the IDs are unchanging integer values, use that.
def __lt__(self, other):
return self.id < other.id
19 changes: 10 additions & 9 deletions pydash/pydash_app/dashboard/services/fetching.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,17 @@ def schedule_all_periodic_dashboards_tasks(


def schedule_periodic_dashboard_fetching(
dashboard,
dashboard_id,
interval=timedelta(hours=1),
scheduler=periodic_tasks.default_task_scheduler):
"""
Schedules the periodic EndpointCall fetching task for this dashboard.
"""
logger.info(f'Creating periodic fetching task for {dashboard}')
logger.info(f'Creating periodic fetching task for {dashboard_id}')

periodic_tasks.add_periodic_task(
name=("dashboard", dashboard.id, "fetching"),
task=partial(fetch_and_update_new_dashboard_info, dashboard.id),
name=("dashboard", dashboard_id, "fetching"),
task=partial(fetch_and_update_new_dashboard_info, dashboard_id),
interval=interval,
scheduler=scheduler)

Expand All @@ -62,16 +62,17 @@ def schedule_historic_dashboard_fetching(
The periodic fetching of new EndpointCall information is scheduled as soon as this task completes.
"""

def task(dashboard_id):
fetch_and_update_historic_dashboard_info(dashboard_id)
schedule_periodic_dashboard_fetching(dashboard_id)

periodic_tasks.add_background_task(
name=("dashboard", dashboard.id, "historic_fetching"),
task=partial(task, dashboard.id),
task=partial(_task_historic_dashboard_fetching, dashboard.id),
scheduler=scheduler)


def _task_historic_dashboard_fetching(dashboard_id):
fetch_and_update_historic_dashboard_info(dashboard_id)
schedule_periodic_dashboard_fetching(dashboard_id)


def fetch_and_update_new_dashboard_info(dashboard_id):
"""
Updates the dashboard with the new EndpointCall information that is fetched from the Dashboard's remote location.
Expand Down
78 changes: 78 additions & 0 deletions pydash/pydash_app/dashboard/services/pinging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""
Periodically pings a dashboard to see if the web service is still up.
"""

from functools import partial
from datetime import timedelta
import json

import requests

import flask_monitoring_dashboard_client
import pydash_app.dashboard.repository as dashboard_repository
import pydash_logger
import periodic_tasks

logger = pydash_logger.Logger(__name__)

_DEFAULT_PING_INTERVAL = timedelta(minutes=5)


def schedule_all_periodic_dashboard_pinging(
interval=_DEFAULT_PING_INTERVAL,
scheduler=periodic_tasks.default_task_scheduler):
"""
Set up periodic dashboard pinging tasks for all dashboards that want their uptime to be monitored.
:param interval: The frequency with which to ping a dashboard, defaults to 5 minutes.
:param scheduler: The task scheduler to schedule the tasks to, defaults to the default scheduler.
"""
for dashboard in dashboard_repository.all():
schedule_periodic_dashboard_pinging(dashboard, interval, scheduler)


def schedule_periodic_dashboard_pinging(
dashboard,
interval=_DEFAULT_PING_INTERVAL,
scheduler=periodic_tasks.default_task_scheduler):
"""
Set up a periodic pinging task for a dashboard if the dashboard allows it.
:param dashboard: The dashboard to set up a pinging task for.
:param interval: The frequency with which to ping a dashboard, defaults to 5 minutes.
:param scheduler: The task scheduler to schedule this task to, defaults to the default scheduler.
"""

if dashboard.monitor_downtime:
periodic_tasks.add_periodic_task(
name=('dashboard', dashboard.id, 'pinging'),
task=partial(_ping_dashboard, dashboard.id),
interval=interval,
scheduler=scheduler)


def _ping_dashboard(dashboard_id):
try:
dashboard = dashboard_repository.find(dashboard_id)
except KeyError:
logger.warning('Dashboard does not exist')
return

is_up = _is_dashboard_up(dashboard.url)
dashboard.add_ping_result(is_up)

dashboard_repository.update(dashboard)


def _is_dashboard_up(url):
"""
Connect to a dashboard to see if it's up.
:param url: The dashboard's URL.
:return: True or False depending on whether the dashboard is up.
"""
try:
flask_monitoring_dashboard_client.get_details(url)
except requests.exceptions.RequestException:
return False
except (json.JSONDecodeError, Exception):
return True

return True
3 changes: 2 additions & 1 deletion pydash/pydash_app/dashboard/services/seeding.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def seed():
dashboard_new = Dashboard('http://flask-sample.koenbolhuis.nl/dashboard',
'cc83733cb0af8b884ff6577086b87909',
user.get_id(),
'Testing Dashboard (FMD v1.12.0)')
'Testing Dashboard (FMD v1.12.0+)',
True)
dashboard_old = Dashboard('http://flask-sample-old.koenbolhuis.nl/dashboard',
'cc83733cb0af8b884ff6577086b87909',
user.get_id(),
Expand Down
2 changes: 1 addition & 1 deletion pydash/pydash_app/user/services/pruning.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def schedule_periodic_pruning_task(
:param scheduler: The TaskScheduler instance that should schedule this user pruning task and execute it.
Defaults to the default task scheduler of pydash.periodic_tasks.
"""
scheduler.add_periodic_task(
periodic_tasks.add_periodic_task(
name=('users', 'pruning'),
task=_prune_unverified_users,
interval=interval,
Expand Down
Loading

0 comments on commit a665da0

Please sign in to comment.