diff --git a/ConfigExamples/azureLogAnalytics.yaml b/ConfigExamples/azureLogAnalytics.yaml new file mode 100644 index 0000000..7693cff --- /dev/null +++ b/ConfigExamples/azureLogAnalytics.yaml @@ -0,0 +1,14 @@ +collect: # Settings determining which audit logs to collect and how to do it + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.Exchange: True + Audit.SharePoint: True + DLP.All: True + skipKnownLogs: True + resume: True +output: + azureLogAnalytics: + enabled: True + workspaceId: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + sharedKey: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \ No newline at end of file diff --git a/ConfigExamples/fileOutput.yaml b/ConfigExamples/fileOutput.yaml new file mode 100644 index 0000000..c8ec0b2 --- /dev/null +++ b/ConfigExamples/fileOutput.yaml @@ -0,0 +1,13 @@ +collect: + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.Exchange: True + Audit.SharePoint: True + DLP.All: True + skipKnownLogs: True + resume: True +output: + file: + enabled: True + path: 'output.txt' \ No newline at end of file diff --git a/ConfigExamples/filteredFileOutput.yaml b/ConfigExamples/filteredFileOutput.yaml new file mode 100644 index 0000000..0dc68fe --- /dev/null +++ b/ConfigExamples/filteredFileOutput.yaml @@ -0,0 +1,21 @@ +collect: + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.SharePoint: True + skipKnownLogs: True + resume: True +# Collect logs concerning spoofing prevention in Audit.General, deleted files from Audit.SharePoint +# and login failures from Audit.AzureActiveDirectory +filter: + Audit.General: + Policy: Spoof + Audit.AzureActiveDirectory: + Operation: UserLoginFailed + Audit.SharePoint: + Operation: FileDeleted +# Output only to file +output: + file: + enabled: True + path: 'output.txt' \ No newline at end of file diff --git a/ConfigExamples/fullConfig.yaml b/ConfigExamples/fullConfig.yaml new file mode 100644 index 0000000..1a9faf0 --- /dev/null +++ b/ConfigExamples/fullConfig.yaml @@ -0,0 +1,38 @@ +log: # Log settings. Debug will severely decrease performance + path: 'collector.log' + debug: False +collect: # Settings determining which audit logs to collect and how to do it + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.Exchange: True + Audit.SharePoint: True + DLP.All: True + maxThreads: 50 + retries: 3 # Times to retry retrieving a content blob if it fails + retryCooldown: 3 # Seconds to wait before retrying retrieving a content blob + autoSubscribe: True # Automatically subscribe to collected content types. Never unsubscribes from anything. + skipKnownLogs: True # Remember retrieved log ID's, don't collect them twice + resume: True # Remember last run time, resume collecting from there next run + hoursToCollect: 24 # Look back this many hours for audit logs (can be overwritten by resume) +filter: # Only logs that match ALL filters for a content type are collected. Leave empty to collect all + Audit.General: + Audit.AzureActiveDirectory: + Audit.Exchange: + Audit.SharePoint: + DLP.All: +output: + file: + enabled: False + path: 'output.txt' + azureLogAnalytics: + enabled: False + workspaceId: + sharedKey: + graylog: + enabled: False + address: + port: + prtg: + enabled: False + channels: \ No newline at end of file diff --git a/ConfigExamples/graylog.yaml b/ConfigExamples/graylog.yaml new file mode 100644 index 0000000..13da73b --- /dev/null +++ b/ConfigExamples/graylog.yaml @@ -0,0 +1,14 @@ +collect: + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.Exchange: True + Audit.SharePoint: True + DLP.All: True + skipKnownLogs: True + resume: True +output: + graylog: + enabled: False + address: 172.16.1.1 + port: 5000 \ No newline at end of file diff --git a/ConfigExamples/prtg.yaml b/ConfigExamples/prtg.yaml new file mode 100644 index 0000000..4f5c0ed --- /dev/null +++ b/ConfigExamples/prtg.yaml @@ -0,0 +1,26 @@ +collect: + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.SharePoint: True + skipKnownLogs: False # Take all logs each time to count the number of active filter hits each interval + resume: False # Take all logs each time to count the number of active filter hits each interval + hoursToCollect: 1 # Period over which to alert, e.g. failed AAD logins over the last hour +# The PRTG output defines channels which have filters associated to them. The output of the channel will be +# the number of hits on the filter. E.g. filter for failed AAD logins on a "Failed AAD logins" channel. +output: + prtg: + enabled: True + channels: + - name: Deleted Sharepoint files + filters: + Audit.SharePoint: + Operation: FileDeleted + - name: Failed Azure AD logins + filters: + Audit.AzureActiveDirectory: + Operation: UserLoginFailed + - name: Spoof attempts prevented + filters: + Audit.General: + Policy: Spoof \ No newline at end of file diff --git a/Linux/AuditLogCollector b/Linux/AuditLogCollector deleted file mode 100644 index a6b819f..0000000 Binary files a/Linux/AuditLogCollector and /dev/null differ diff --git a/Linux/AuditLogSubscriber b/Linux/OfficeAuditLogCollector-V1.1 similarity index 96% rename from Linux/AuditLogSubscriber rename to Linux/OfficeAuditLogCollector-V1.1 index 2371bee..8db8bdf 100644 Binary files a/Linux/AuditLogSubscriber and b/Linux/OfficeAuditLogCollector-V1.1 differ diff --git a/README.md b/README.md index 69463d6..9f26865 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,41 @@ -# Office365 API audit log collector - -Collect Office365 and Azure audit logs through their respective APIs. No prior knowledge of APIs is required, -onboarding and script usage is described below. There is a GUI for Windows. Currently supports the following outputs: -- Azure Analytics Workspace (OMS) -- Graylog (or any other source that accepts a simple socket connection) -- File - -Simply download the executable(s) you need from the Windows or Linux folder.: +# Office365 audit log collector + +Collect/retrieve Office365, Azure and DLP audit logs, optionally filter them, then send them to one or more outputs such as file, PRTG, Azure Log Analytics or Graylog. +Onboarding is easy and takes only a few minutes (steps described below). There are Windows and Linux executables, and an optional GUI for Windows only. +Easy configuration with a YAML config file (see the 'ConfigExamples' folder for reference). +If you have any issues or questions, feel free to create an issue in this repo. +- The following Audit logs can be extracted: + - Audit.General + - Audit.AzureActiveDirectory + - Audit.Exchange + - Audit.SharePoint + - DLP.All +- The following outputs are supported: + - Azure Analytics Workspace (OMS) + - PRTG Network Monitor + - Graylog (or any other source that accepts a simple socket connection) + - Local file + +Simply download the executable you need from the Windows or Linux folder and copy a config file from the ConfigExamples folder that suits your need: - Windows: - - GUI - Office Audit Log Collector.exe - - GUI for collecting audit logs AND subscribing to audit log feeds (see onboarding instructions below) - - Office Audit Log Collector.exe - - Command line tool for collecting audit logs (see syntax below) - - Office Audit Log Subscriber.exe - - Command line tool for subscribing to audit logs feeds (see onboarding instructions below) + - GUI-OfficeAuditLogCollector.exe + - GUI for collecting audit logs and subscribing to audit log feeds + - OfficeAuditLogCollector.exe + - Command line tool for collecting audit logs and (automatically) subscribing to audit log feeds - Linux: - OfficeAuditLogCollector - - Command line tool for collecting audit logs (see syntax below) - - OfficeAuditLogSubscriber - - Command line tool for subscribing to audit logs (see onboarding instructions below) + - Command line tool for collecting audit logs and (automatically) subscribing to audit log feeds + +Find onboarding instructions and more detailed instructions for using the executables below. + +For a full audit trail, schedule to run the collector on a regular basis (preferably at least once every day). Previously +retrieved logs can be remembered to prevent duplicates. Consider using the following parameters in the config file for a robust audit trail: +- skipKnownLogs: True (prevent duplicates) +- hoursToCollect: 24 (the maximum, or a number larger than the amount of hours between runs, for safety overlap) +- resume: False (don't resume where the last run stopped, have some overlap in case anything was missed for any reason) +See below for a more detailed instruction of the config file. -For a full audit trail schedule to run the script on a regular basis (preferably at least once every day). The last -run time is recorded automatically, so that when the script runs again it starts to retrieve audit logs from when it last ran. -Feel free to contribute other outputs if you happen to build any. +Lastly, feel free to contribute other outputs if you happen to build any. Also open to any other useful pull requests! See the following link for more info on the management APIs: https://msdn.microsoft.com/en-us/office-365/office-365-management-activity-api-reference. ## Roadmap: @@ -32,6 +45,9 @@ See the following link for more info on the management APIs: https://msdn.micros - Create a tutorial for automatic onboarding + docker container for the easiest way to run this ## Latest changes: +- Added PRTG output +- Added filters +- Added YAML config file - Added a GUI for Windows - Added executables for Windows and Linux - Added Azure Log Analytics Workspace OMS output @@ -48,94 +64,87 @@ See the following link for more info on the management APIs: https://msdn.micros - Ad-lib log retrieval; - Scheduling regular execution to retrieve the full audit trail. +- Output to PRTG for alerts on audit logs ## Features: -- Subscribe to the audit logs of your choice through the subscription script; +- Subscribe to the audit logs of your choice through the --interactive-subscriber switch, or automatically when collecting logs; - Collect General, Exchange, Sharepoint, Azure active directory and/or DLP audit logs through the collector script; -- Output to file or to a Graylog input (i.e. send the logs over a network socket) +- Output to file, PRTG, Azure Log Analytics or to a Graylog input (i.e. send the logs over a network socket). ## Requirements: - Office365 tenant; -- Azure app registration created for this script (see instructions) -- AzureAD tenant ID; -- Client key of the new Azure app registration; +- Azure app registration created for this script (see onboarding instructions) - Secret key (created in the new Azure app registration, see instructions); - App permissions to access the APIs for the new Azure application (see instructions); -- Subscription to the APIs of your choice (General/Sharepoint/Exchange/AzureAD/DLP, run AuditLogSubscription script and follow the instructions). +- Subscription to the APIs of your choice (use autoSubscribe option in the config file to automate this). ## Instructions: -### Onboarding: -- Create an app registration: - - Create the app registration itself under Azure AD (own tenant only works fine for single tenant) - - Create app secret (only shown once upon creation, store it somewhere safe) - - Grant your new app permissions to read the Office API's: - - Graph: AuditLog.Read.All - - Office 365 Management APIs: ActivityFeed.Read - - Office 365 Management APIs: ActivityFeed.ReadDlp +### Onboarding (one time only): - Make sure Auditing is turned on for your tenant! - - https://docs.microsoft.com/en-us/microsoft-365/compliance/turn-audit-log-search-on-or-off?view=o365-worldwide + - Use these instructions: https://docs.microsoft.com/en-us/microsoft-365/compliance/turn-audit-log-search-on-or-off?view=o365-worldwide - If you had to turn it on, it may take a few hours to process -- Use the 'AuditLogSubscriber' script to subscribe to the audit API's of your choice - - You will need tenant id, client key and secret key for this - - Simply follow the instructions -- You can now run the script and retrieve logs. - +- Create App registration: + - Azure AD > 'App registrations' > 'New registration': + - Choose any name for the registration + - Choose "Accounts in this organizational directory only (xyz only - Single tenant)" + - Hit 'register' + - Save 'Tenant ID' and 'Application (Client) ID' from the overview page of the new registration, you will need it to run the collector +- Create app secret: + - Azure AD > 'App registrations' > Click your new app registration > 'Certificates and secrets' > 'New client secret': + - Choose any name and expire date and hit 'add' + - Actual key is only shown once upon creation, store it somewhere safe. You will need it to run the collector. +- Grant your new app registration 'application' permissions to read the Office API's: + - Azure AD > 'App registrations' > Click your new app registration > 'API permissions' > 'Add permissions' > 'Office 365 Management APIs' > 'Application permissions': + - Check 'ActivityFeed.Read' + - Check 'ActivityFeed.ReadDlp' + - Hit 'Add permissions' +- Subscribe to audit log feeds of your choice + - Set 'autoSubscribe: True' in YAML config file to automate this. + - OR Use the '--interactive-subscriber' parameter when executing the collector to manually subscribe to the audit API's of your choice +- You can now run the collector and retrieve logs. + + +### Running the collector: + +Running from the GUI should be self-explanatory. It can run once or on a schedule. Usually you will want to use the +command-line executable with a config file, and schedule it for periodic execution (e.g. through CRON, windows task +scheduler, or a PRTG sensor). + +To run the command-line executable use the following syntax: + +OfficeAuditLogCollector(.exe) %tenant_id% %client_key% %secret_key% --config %path/to/config.yaml% + +To create a config file you can start with the 'fullConfig.yaml' from the ConfigExamples folder. This has all the +possible options and some explanatory comments. Cross-reference with a config example using the output(s) of your choice, and you +should be set. ### (optional) Creating an Azure Log Analytics Workspace (OMS): -If you are running this script to get audit events in an Azure Analytics Workspace you will a Workspace ID and a shared key. -Create a workspace from "Create resource" in Azure (no configuration required). Then get the ID and key from "Agent management". -You do not need to prepare any tables or other settings. - +If you are running this script to get audit events in an Azure Analytics Workspace you will need a Workspace ID and a shared key. +- Create a workspace from "Create resource" in Azure (no configuration required); +- Get the ID and key from "Agent management"; +- You do not need to prepare any tables or other settings. + +### (optional) Creating a PRTG sensor + +To run with PRTG you must create a sensor: +- Copy the OfficeAuditLogCollector.exe executable to the "\Custom Sensors\EXE" sub folder of your PRTG installation +- Create a device in PRTG with any host name (e.g. "Office Audit Logs") +- Create a 'EXE/Script Advanced Sensor' on that device and choose the executable you just copied +- Enter parameters, e.g.: "*tenant_id* *client_key* *secret_key* --config *full/path/to/config.yaml*" +(use full path, because PRTG will execute the script from a different working directory) +- Copy the prtg.config from ConfigExamples and modify at least the channel names and filters for your needs. +- Set the timeout of the script to something generous that suits the amount of logs you will retrieve. +Probably at least 300 seconds. Run the script manually first to check how long it takes. +- Match the interval of the sensor to the amount of hours of logs to retrieve. If your interval is 1 hour, hoursToCollect +in the config file should also be set to one hour. ### (optional) Creating a Graylog input -If you are running this script to get audit events in Graylog you will need to create a Graylog input. If not, just skip this. - +If you are running this script to get audit events in Graylog you will need to create a Graylog input. - Create a 'raw/plaintext TCP' input; - Enter the IP and port you want to receive the logs on (you can use these in the script); - All other settings can be left default. - -### Running the script: - -- Retrieve all logs and send to a network socket / Graylog server: -`python3 AuditLogCollector.py 'tenant_id' 'client_key' 'secret_key' --exchange --dlp --azure_ad --general --sharepoint -p 'random_publisher_id' -g -gA 10.10.10.1 -gP 6000` - -#### Script options: -``` -usage: AuditLogCollector.py [-h] [--general] [--exchange] [--azure_ad] - [--sharepoint] [--dlp] [-p publisher_id] - [-l log_path] [-f] [-fP file_output_path] [-g] - [-gA graylog_address] [-gP graylog_port] - tenant_id client_key secret_key` - -positional arguments: - tenant_id Tenant ID of Azure AD - client_key Client key of Azure application - secret_key Secret key generated by Azure application` - -optional arguments: - -h, --help show this help message and exit - --general Retrieve General content - --exchange Retrieve Exchange content - --azure_ad Retrieve Azure AD content - --sharepoint Retrieve SharePoint content - --dlp Retrieve DLP content - -r Resume looking for content from last run time for each content type (takes precedence over -tH and -tD) - -tH Number of hours to to go back and look for content - -tD Number of days to to go back and look for content - -p publisher_id Publisher GUID to avoid API throttling - -l log_path Path of log file - -f Output to file. - -fP file_output_path Path of directory of output files - -a Output to Azure Log Analytics workspace - -aC ID of log analytics workspace. - -aS Shared key of log analytics workspace. - -g Output to graylog. - -gA graylog_address Address of graylog server. - -gP graylog_port Port of graylog server. - -d Enable debug logging (large log files and lower performance) -``` \ No newline at end of file diff --git a/Source/ApiConnection.py b/Source/ApiConnection.py index f445e17..b86e6af 100644 --- a/Source/ApiConnection.py +++ b/Source/ApiConnection.py @@ -6,7 +6,7 @@ class ApiConnection(object): - def __init__(self, tenant_id=None, client_key=None, secret_key=None, publisher_id=None): + def __init__(self, tenant_id=None, client_key=None, secret_key=None, publisher_id=None, **kwargs): """ Object that creates the authorization headers for- and sends API requests to the Microsoft Office APIs'. Taken from a Microsoft sample script that I cannot find the original of to reference. @@ -43,7 +43,8 @@ def login(self): self.client_key, urllib.parse.quote(self.secret_key), resource) r = requests.post(auth_url, headers=headers, data=data, verify=True) resp = r.json() - + if not self.publisher_id: + self.publisher_id = self.tenant_id try: headers['Authorization'] = 'bearer ' + resp['access_token'] logging.log(level=logging.DEBUG, msg='Logged in') diff --git a/Source/AuditLogCollector.py b/Source/AuditLogCollector.py index 7611cd3..b3f269c 100644 --- a/Source/AuditLogCollector.py +++ b/Source/AuditLogCollector.py @@ -1,95 +1,268 @@ -# Standard libs +from Interfaces import AzureOMSInterface, GraylogInterface, PRTGInterface +import AuditLogSubscriber +import ApiConnection import os import sys +import yaml +import time import json import logging import datetime import argparse -import dateutil.parser import collections import threading -# Internal libs -import AzureOMSInterface -import GraylogInterface -import ApiConnection class AuditLogCollector(ApiConnection.ApiConnection): - def __init__(self, *args, content_types=None, resume=True, fallback_time=None, - file_output=False, output_path=None, - graylog_output=False, graylog_address=None, graylog_port=None, - azure_oms_output=False, azure_oms_workspace_id=None, azure_oms_shared_key=None, - **kwargs): + def __init__(self, content_types=None, resume=True, fallback_time=None, skip_known_logs=True, + log_path='collector.log', debug=False, auto_subscribe=False, max_threads=20, retries=3, + retry_cooldown=3, file_output=False, output_path=None, graylog_output=False, azure_oms_output=False, + prtg_output=False, **kwargs): """ Object that can retrieve all available content blobs for a list of content types and then retrieve those blobs and output them to a file or Graylog input (i.e. send over a socket). :param content_types: list of content types to retrieve (e.g. 'Audit.Exchange', 'Audit.Sharepoint') :param resume: Resume from last known run time for each content type (Bool) :param fallback_time: if no last run times are found to resume from, run from this start time (Datetime) + :param retries: Times to retry retrieving a content blob if it fails (int) + :param retry_cooldown: Seconds to wait before retrying retrieving a content blob (int) + :param skip_known_logs: record retrieved content blobs and log ids, skip them next time (Bool) :param file_output: path of file to output audit logs to (str) + :param log_path: path of file to log to (str) + :param debug: enable debug logging (Bool) + :param auto_subscribe: automatically subscribe to audit log feeds for which content is retrieved (Bool) :param output_path: path to output retrieved logs to (None=no file output) (string) :param graylog_output: Enable graylog Interface (Bool) - :param graylog_address: IP/Hostname of Graylog server to output audit logs to (str) - :param graylog_port: port of Graylog server to output audit logs to (int) :param azure_oms_output: Enable Azure workspace analytics OMS Interface (Bool) - :param azure_oms_workspace_id: Found under "Agent Configuration" blade in Portal (str) - :param azure_oms_shared_key: Found under "Agent Configuration" blade in Portal(str) + :param prtg_output: Enable PRTG output (Bool) """ - super().__init__(*args, **kwargs) + super().__init__(**kwargs) + self.content_types = content_types or collections.deque() + self.resume = resume + self._fallback_time = fallback_time or datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( + hours=23) + self.retries = retries + self.retry_cooldown = retry_cooldown + self.skip_known_logs = skip_known_logs + self.log_path = log_path + self.debug = debug + self.auto_subscribe = auto_subscribe + self.filters = {} + self.file_output = file_output - self.graylog_output = graylog_output - self.azure_oms_output = azure_oms_output self.output_path = output_path - self.content_types = content_types or collections.deque() + self.azure_oms_output = azure_oms_output + self.azure_oms_interface = AzureOMSInterface.AzureOMSInterface(**kwargs) + self.graylog_output = graylog_output + self.graylog_interface = GraylogInterface.GraylogInterface(**kwargs) + self.prtg_output = prtg_output + self.prtg_interface = PRTGInterface.PRTGInterface(**kwargs) + self.max_threads = max_threads + self._last_run_times = {} - self.resume = resume - if resume: - self.get_last_run_times() - self._fallback_time = fallback_time or datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=1) self._known_content = {} - self._azure_oms_interface = AzureOMSInterface.AzureOMSInterface(workspace_id=azure_oms_workspace_id, - shared_key=azure_oms_shared_key) - self._graylog_interface = GraylogInterface.GraylogInterface(graylog_address=graylog_address, - graylog_port=graylog_port) + self._known_logs = {} + self.blobs_to_collect = collections.defaultdict(collections.deque) self.monitor_thread = threading.Thread() self.retrieve_available_content_threads = collections.deque() self.retrieve_content_threads = collections.deque() self.run_started = None self.logs_retrieved = 0 + self.errors_retrieving = 0 - def run_once(self, start_time=None): + @property + def all_content_types(self): """ - Check available content and retrieve it, then exit. + :return: list of str + """ + return ['Audit.General', 'Audit.AzureActiveDirectory', 'Audit.Exchange', 'Audit.SharePoint', 'DLP.All'] + + def load_config(self, path): + """ + Load a YML config containing settings for this collector and its' interfaces. + :param path: str + """ + with open(path, 'r') as ofile: + config = yaml.safe_load(ofile) + self._load_log_config(config=config) + self._load_collect_config(config=config) + self._load_filter_config(config=config) + self._load_output_config(config=config) + + def _load_log_config(self, config): + """ + :param config: str + """ + if 'log' in config['collect']: + if 'path' in config['collect']['log']: + self.log_path = config['collect']['log']['path'] + if 'debug' in config['collect']['log']: + self.debug = config['collect']['log']['path'] + + def _load_collect_config(self, config): + """ + :param config: str + """ + if 'collect' in config: + if 'contentTypes' in config['collect']: + self.content_types = [ + x for x in self.all_content_types if x in config['collect']['contentTypes'] and + config['collect']['contentTypes'][x] is True] + if 'maxThreads' in config['collect']: + self.max_threads = config['collect']['maxThreads'] + if 'retries' in config['collect']: + self.retries = config['collect']['retries'] + if 'retryCooldown' in config['collect']: + self.retry_cooldown = config['collect']['retryCooldown'] + if 'autoSubscribe' in config['collect']: + self.auto_subscribe = config['collect']['autoSubscribe'] + if 'skipKnownLogs' in config['collect']: + self.skip_known_logs = config['collect']['skipKnownLogs'] + if 'resume' in config['collect']: + self.resume = config['collect']['resume'] + if 'hoursToCollect' in config['collect']: + self._fallback_time = datetime.datetime.now(datetime.timezone.utc) -\ + datetime.timedelta(hours=config['collect']['hoursToCollect']) + + def _load_filter_config(self, config): + """ + :param config: str + """ + if 'filter' in config and config['filter']: + self.filters = config['filter'] + + def _load_output_config(self, config): + """ + :param config: str + """ + if 'output' in config: + self._load_file_output_config(config=config) + self._load_azure_log_analytics_output_config(config=config) + self._load_graylog_output_config(config=config) + self._load_prtg_output_config(config=config) + + def _load_file_output_config(self, config): + """ + :param config: str + """ + if 'file' in config['output']: + if 'enabled' in config['output']['file']: + self.file_output = config['output']['file']['enabled'] + if 'path' in config['output']['file']: + self.output_path = config['output']['file']['path'] + + def _load_azure_log_analytics_output_config(self, config): + """ + :param config: str """ - self._known_content.clear() + if 'azureLogAnalytics' in config['output']: + if 'enabled' in config['output']['azureLogAnalytics']: + self.azure_oms_output = config['output']['azureLogAnalytics']['enabled'] + if 'workspaceId' in config['output']['azureLogAnalytics']: + self.azure_oms_interface.workspace_id = config['output']['azureLogAnalytics']['workspaceId'] + if 'sharedKey' in config['output']['azureLogAnalytics']: + self.azure_oms_interface.shared_key = config['output']['azureLogAnalytics']['sharedKey'] + + def _load_graylog_output_config(self, config): + """ + :param config: str + """ + if 'graylog' in config['output']: + if 'enabled' in config['output']['graylog']: + self.graylog_output = config['output']['graylog']['enabled'] + if 'address' in config['output']['graylog']: + self.graylog_interface.gl_address = config['output']['graylog']['address'] + if 'port' in config['output']['graylog']: + self.graylog_interface.gl_port = config['output']['graylog']['port'] + + def _load_prtg_output_config(self, config): + """ + :param config: str + """ + if 'prtg' in config['output']: + if 'enabled' in config['output']['prtg']: + self.prtg_output = config['output']['prtg']['enabled'] + self.prtg_interface.config = config['output']['prtg'] + + def init_logging(self): + """ + Start logging to file and console. If PRTG output is enabled do not log to console, as this will interfere with + the sensor result. + """ + logger = logging.getLogger() + file_handler = logging.FileHandler(self.log_path, mode='w') + if not self.prtg_output: + stream_handler = logging.StreamHandler(sys.stdout) + logger.addHandler(stream_handler) + logger.addHandler(file_handler) + logger.setLevel(logging.INFO if not self.debug else logging.DEBUG) + + def _prepare_to_run(self): + """ + Make sure that self.run_once can be called multiple times by resetting to an initial state. + """ + if self.auto_subscribe: + self._auto_subscribe() + if self.resume: + self._get_last_run_times() + if self.skip_known_logs: + self._known_content.clear() + self._known_logs.clear() + self._clean_known_content() + self._clean_known_logs() self.logs_retrieved = 0 - self._graylog_interface.successfully_sent = 0 - self._graylog_interface.unsuccessfully_sent = 0 - self._azure_oms_interface.successfully_sent = 0 - self._azure_oms_interface.unsuccessfully_sent = 0 + self.graylog_interface.successfully_sent = 0 + self.graylog_interface.unsuccessfully_sent = 0 + self.azure_oms_interface.successfully_sent = 0 + self.azure_oms_interface.unsuccessfully_sent = 0 self.run_started = datetime.datetime.now() - self._clean_known_content() - if self.resume: - self.get_last_run_times() - self.start_monitoring() - self.get_all_available_content(start_time=start_time) + + def run_once(self, start_time=None): + """ + Check available content and retrieve it, then exit. + """ + logging.log(level=logging.INFO, msg='Starting run @ {}. Content: {}.'.format( + datetime.datetime.now(), self.content_types)) + self._prepare_to_run() + self._start_monitoring() + self._get_all_available_content(start_time=start_time) self.monitor_thread.join() + self._finish_run() + + def _finish_run(self): + """ + Save relevant information and output PRTG result if the interface is enabled. The other interfaces output + while collecting. + """ + if self.skip_known_logs: + self._add_known_log() + self._add_known_content() if self.resume and self._last_run_times: with open('last_run_times', 'w') as ofile: json.dump(fp=ofile, obj=self._last_run_times) - logging.info("Finished. Total logs retrieved: {}. Run time: {}.".format( - self.logs_retrieved, datetime.datetime.now() - self.run_started)) + if self.prtg_output: + self.prtg_interface.output() + self._log_statistics() + + def _log_statistics(self): + """ + Write run statistics to log file / console. + """ + logging.info("Finished. Total logs retrieved: {}. Total logs with errors: {}. Run time: {}.".format( + self.logs_retrieved, self.errors_retrieving, datetime.datetime.now() - self.run_started)) if self.azure_oms_output: logging.info("Azure OMS output report: {} successfully sent, {} errors".format( - self._azure_oms_interface.successfully_sent, self._azure_oms_interface.unsuccessfully_sent)) + self.azure_oms_interface.successfully_sent, self.azure_oms_interface.unsuccessfully_sent)) if self.graylog_output: logging.info("Graylog output report: {} successfully sent, {} errors".format( - self._graylog_interface.successfully_sent, self._graylog_interface.unsuccessfully_sent)) - - def get_last_run_times(self): + self.graylog_interface.successfully_sent, self.graylog_interface.unsuccessfully_sent)) + def _get_last_run_times(self): + """ + Load last_run_times file and interpret the datetime for each content type. + """ if os.path.exists('last_run_times'): try: with open('last_run_times', 'r') as ofile: @@ -105,7 +278,10 @@ def get_last_run_times(self): @property def done_retrieving_content(self): - + """ + Returns True if there are no more content blobs to be collected. Used to determine when to exit the script. + :return: Bool + """ for content_type in self.blobs_to_collect: if self.blobs_to_collect[content_type]: return False @@ -120,18 +296,34 @@ def done_collecting_available_content(self): """ return not bool(self.content_types) - def start_monitoring(self): - - self.monitor_thread = threading.Thread(target=self.monitor_blobs_to_collect, daemon=True) + def _start_monitoring(self): + """ + Start a thread monitoring the list containing blobs that need collecting. + """ + self.monitor_thread = threading.Thread(target=self._monitor_blobs_to_collect, daemon=True) self.monitor_thread.start() - def stop_monitoring(self): - - self.monitor_thread.join() + def _auto_subscribe(self): + """ + Subscribe to all content types that are set to be retrieved. + """ + subscriber = AuditLogSubscriber.AuditLogSubscriber(tenant_id=self.tenant_id, client_key=self.client_key, + secret_key=self.secret_key) + status = subscriber.get_sub_status() + if status == '': + raise RuntimeError("Auto subscribe enabled but could not get subscription status") + unsubscribed_content_types = self.content_types.copy() + for s in status: + if s['contentType'] in self.content_types and s['status'].lower() == 'enabled': + unsubscribed_content_types.remove(s['contentType']) + for content_type in unsubscribed_content_types: + logging.info("Auto subscribing to: {}".format(content_type)) + subscriber.set_sub_status(content_type=content_type, action='start') - def get_all_available_content(self, start_time=None): + def _get_all_available_content(self, start_time=None): """ - Make a call to retrieve avaialble content blobs for a content type in a thread. + Start a thread to retrieve available content blobs for each content type to be collected. + :param start_time: DateTime """ for content_type in self.content_types.copy(): if not start_time: @@ -140,20 +332,22 @@ def get_all_available_content(self, start_time=None): else: start_time = self._fallback_time self.retrieve_available_content_threads.append(threading.Thread( - target=self.get_available_content, daemon=True, + target=self._get_available_content, daemon=True, kwargs={'content_type': content_type, 'start_time': start_time})) self.retrieve_available_content_threads[-1].start() - def get_available_content(self, content_type, start_time): + def _get_available_content(self, content_type, start_time): """ - Make a call to retrieve avaialble content blobs for a content type in a thread. If the response contains a + Retrieve available content blobs for a content type. If the response contains a 'NextPageUri' there is more content to be retrieved; rerun until all has been retrieved. """ try: - logging.log(level=logging.DEBUG, msg='Getting available content for type: "{0}"'.format(content_type)) + logging.log(level=logging.DEBUG, msg='Getting available content for type: "{}"'.format(content_type)) current_time = datetime.datetime.now(datetime.timezone.utc) formatted_end_time = str(current_time).replace(' ', 'T').rsplit('.', maxsplit=1)[0] formatted_start_time = str(start_time).replace(' ', 'T').rsplit('.', maxsplit=1)[0] + logging.info("Retrieving {}. Start time: {}. End time: {}.".format( + content_type, formatted_start_time, formatted_end_time)) response = self.make_api_request(url='subscriptions/content?contentType={0}&startTime={1}&endTime={2}'.format( content_type, formatted_start_time, formatted_end_time)) self.blobs_to_collect[content_type] += response.json() @@ -171,15 +365,30 @@ def get_available_content(self, content_type, start_time): self.content_types.remove(content_type) self._last_run_times[content_type] = start_time.strftime("%Y-%m-%dT%H:%M:%SZ") - def monitor_blobs_to_collect(self): + def _start_interfaces(self): + + if self.azure_oms_output: + self.azure_oms_interface.start() + if self.prtg_output: + self.prtg_interface.start() + if self.graylog_output: + self.graylog_interface.start() + + def _stop_interfaces(self): + + if self.azure_oms_output: + self.azure_oms_interface.stop() + if self.prtg_output: + self.prtg_interface.stop() + if self.graylog_output: + self.graylog_interface.stop() + + def _monitor_blobs_to_collect(self): """ Wait for the 'retrieve_available_content' function to retrieve content URI's. Once they become available start retrieving in a background thread. """ - if self.azure_oms_output: - self._azure_oms_interface.start() - if self.graylog_output: - self._graylog_interface.start() + self._start_interfaces() threads = collections.deque() while True: threads = [thread for thread in threads if thread.is_alive()] @@ -188,77 +397,187 @@ def monitor_blobs_to_collect(self): if not self.blobs_to_collect: continue for content_type, blobs_to_collect in self.blobs_to_collect.copy().items(): + if len(threads) >= self.max_threads: + break if self.blobs_to_collect[content_type]: blob_json = self.blobs_to_collect[content_type].popleft() - if blob_json and 'contentUri' in blob_json: - logging.log(level=logging.DEBUG, msg='Retrieving content blob: "{0}"'.format(blob_json)) - threads.append(threading.Thread( - target=self.retrieve_content, daemon=True, - kwargs={'content_json': blob_json, 'content_type': content_type})) - threads[-1].start() - if self.azure_oms_output: - self._azure_oms_interface.stop() - if self.graylog_output: - self._graylog_interface.stop() + self._collect_blob(blob_json=blob_json, content_type=content_type, threads=threads) + self._stop_interfaces() - def retrieve_content(self, content_json, content_type): + def _collect_blob(self, blob_json, content_type, threads): + """ + Collect a single content blob in a thread. + :param blob_json: JSON + :param content_type: str + :param threads: list + """ + if blob_json and 'contentUri' in blob_json: + logging.log(level=logging.DEBUG, msg='Retrieving content blob: "{0}"'.format(blob_json)) + threads.append(threading.Thread( + target=self._retrieve_content, daemon=True, + kwargs={'content_json': blob_json, 'content_type': content_type, 'retries': self.retries})) + threads[-1].start() + + def _retrieve_content(self, content_json, content_type, retries): """ Get an available content blob. If it exists in the list of known content blobs it is skipped to ensure idempotence. :param content_json: JSON dict of the content blob as retrieved from the API (dict) :param content_type: Type of API being retrieved for, e.g. 'Audit.Exchange' (str) - :return: + :param retries: Times to retry retrieving a content blob if it fails (int) """ - if self.known_content and content_json['contentId'] in self.known_content: + if self.skip_known_logs and self.known_content and content_json['contentId'] in self.known_content: return try: - result = self.make_api_request(url=content_json['contentUri'], append_url=False).json() - if not result: + results = self.make_api_request(url=content_json['contentUri'], append_url=False).json() + if not results: return except Exception as e: - logging.error("Error retrieving content: {}".format(e)) - return + if retries: + time.sleep(self.retry_cooldown) + return self._retrieve_content(content_json=content_json, content_type=content_type, retries=retries - 1) + else: + self.errors_retrieving += 1 + logging.error("Error retrieving content: {}".format(e)) + return else: - self.logs_retrieved += len(result) - self._add_known_content(content_id=content_json['contentId'], - content_expiration=content_json['contentExpiration']) - if self.file_output: - self.output_results_to_file(results=result) - if self.graylog_output: - self._graylog_interface.send_messages_to_graylog(*result) - if self.azure_oms_output: - self._azure_oms_interface.send_messages_to_oms(*result, content_type=content_type) - - def output_results_to_file(self, results): + self._handle_retrieved_content(content_json=content_json, content_type=content_type, results=results) + + def _handle_retrieved_content(self, content_json, content_type, results): + """ + Check known logs, filter results and output what remains. + :param content_json: JSON dict of the content blob as retrieved from the API (dict) + :param content_type: Type of API being retrieved for, e.g. 'Audit.Exchange' (str) + :param results: list of JSON + """ + + if self.skip_known_logs: + self._known_content[content_json['contentId']] = content_json['contentExpiration'] + for log in results.copy(): + if self.skip_known_logs: + if log['Id'] in self.known_logs: + results.remove(log) + continue + self.known_logs[log['Id']] = log['CreationTime'] + if self.filters and not self._check_filters(log=log, content_type=content_type): + results.remove(log) + self.logs_retrieved += len(results) + self._output_results(results=results, content_type=content_type) + + def _output_results(self, results, content_type): + """ + :param content_type: Type of API being retrieved for, e.g. 'Audit.Exchange' (str) + :param results: list of JSON + """ + if self.file_output: + self._output_results_to_file(*results) + if self.prtg_output: + self.prtg_interface.send_messages(*results, content_type=content_type) + if self.graylog_output: + self.graylog_interface.send_messages(*results, content_type=content_type) + if self.azure_oms_output: + self.azure_oms_interface.send_messages(*results, content_type=content_type) + + def _check_filters(self, log, content_type): + """ + :param log: JSON + :param content_type: Type of API being retrieved for, e.g. 'Audit.Exchange' (str) + :return: True if log matches filter, False if not (Bool) + """ + if content_type in self.filters and self.filters[content_type]: + for log_filter_key, log_filter_value in self.filters[content_type].items(): + if log_filter_key not in log or log[log_filter_key].lower() != log_filter_value.lower(): + return False + return True + + def _output_results_to_file(self, *results): """ Dump received JSON messages to a file. :param results: retrieved JSON (dict) """ - with open(self.output_path, 'a') as ofile: - ofile.write("{}\n".format(json.dump(obj=results, fp=ofile))) + for result in results: + with open(self.output_path, 'a') as ofile: + ofile.write("{}\n".format(json.dumps(obj=result))) - def _add_known_content(self, content_id, content_expiration): + def _add_known_log(self): """ Add a content ID to the known content file to avoid saving messages more than once. - :param content_id: string - :param content_expiration: date string :return: """ - with open('known_content', 'a') as ofile: - ofile.write('\n{0},{1}'.format(content_id, content_expiration)) + with open('known_logs', 'w') as ofile: + for log_id, creation_time in self.known_logs.items(): + ofile.write('{},{}\n'.format(log_id, creation_time)) + + def _add_known_content(self): + """ + Add a content ID to the known content file to avoid saving messages more than once. + :return: + """ + with open('known_content', 'w') as ofile: + for content_id, content_expiration in self.known_content.items(): + ofile.write('{0},{1}\n'.format(content_id, content_expiration)) + + def _clean_known_logs(self): + """ + Remove any known content ID's that have expired. Can't download a duplicate if it is not available for + download. + """ + known_logs = self.known_logs + if os.path.exists('known_logs'): + os.remove('known_logs') + for log_id, creation_time in known_logs.copy().items(): + try: + date = datetime.datetime.strptime(creation_time.strip()+'Z', "%Y-%m-%dT%H:%M:%S%z") + expire_date = date + datetime.timedelta(days=7) + if not datetime.datetime.now(datetime.timezone.utc) < expire_date: + del self.known_logs[log_id] + except Exception as e: + logging.debug("Could not parse known logs: {}".format(e)) + del self.known_logs[log_id] + if not known_logs: + return + with open('known_logs', mode='w') as ofile: + for log_id, creation_time in known_logs.items(): + ofile.write("{},{}\n".format(log_id, creation_time.strip())) def _clean_known_content(self): """ Remove any known content ID's that have expired. Can't download a duplicate if it is not available for download. """ + known_content = self.known_content if os.path.exists('known_content'): - known_contents = self.known_content os.remove('known_content') - for id, expire_date in known_contents.items(): - date = dateutil.parser.parse(expire_date) - if datetime.datetime.now(datetime.timezone.utc) < date: - self._add_known_content(content_id=id, content_expiration=expire_date) + for content_id, expire_date in known_content.copy().items(): + try: + date = datetime.datetime.strptime(expire_date, "%Y-%m-%dT%H:%M:%S.%f%z") + if not datetime.datetime.now(datetime.timezone.utc) < date: + del known_content[content_id] + except Exception as e: + logging.debug("Could not parse known content: {}".format(e)) + del known_content[content_id] + if not known_content: + return + with open('known_logs', 'w') as ofile: + for content_id, expire_date in known_content.items(): + ofile.write("{},{}\n".format(content_id, expire_date)) + + @property + def known_logs(self): + """ + Parse and return known content file. + :return: {content_id: content_expiration_date} dict + """ + if not self._known_logs and os.path.exists('known_logs'): + with open('known_logs', 'r') as ofile: + for line in ofile.readlines(): + if not line.strip(): + continue + try: + self._known_logs[line.split(',')[0].strip()] = line.split(',')[1] + except: + continue + return self._known_logs @property def known_content(self): @@ -266,11 +585,11 @@ def known_content(self): Parse and return known content file. :return: {content_id: content_expiration_date} dict """ - if not os.path.exists('known_content'): - return - if not self._known_content: + if not self._known_content and os.path.exists('known_content'): with open('known_content', 'r') as ofile: for line in ofile.readlines(): + if not line.strip(): + continue try: self._known_content[line.split(',')[0].strip()] = line.split(',')[1] except: @@ -290,6 +609,10 @@ def known_content(self): parser.add_argument('tenant_id', type=str, help='Tenant ID of Azure AD', action='store') parser.add_argument('client_key', type=str, help='Client key of Azure application', action='store') parser.add_argument('secret_key', type=str, help='Secret key generated by Azure application', action='store') + parser.add_argument('--config', metavar='config', type=str, help='Path to YAML config file', + action='store', dest='config') + parser.add_argument('--interactive-subscriber', action='store_true', + help='Manually (un)subscribe to audit log feeds', dest='interactive_subscriber') parser.add_argument('--general', action='store_true', help='Retrieve General content', dest='general') parser.add_argument('--exchange', action='store_true', help='Retrieve Exchange content', dest='exchange') parser.add_argument('--azure_ad', action='store_true', help='Retrieve Azure AD content', dest='azure_ad') @@ -303,14 +626,18 @@ def known_content(self): '-tH and -tD)', action='store_true', dest='resume') parser.add_argument('-tH', metavar='time_hours', type=int, help='Amount of hours to go back and look for content', action='store', dest='time_hours') - parser.add_argument('-tD', metavar='time_days', type=int, help='Amount of days to go back and look for content', - action='store', dest='time_days') + parser.add_argument('-s', + help='Keep track of each retrieved log ID and skip it in the future to prevent duplicates', + action='store_true', dest='skip_known_logs') parser.add_argument('-l', metavar='log_path', type=str, help='Path of log file', action='store', dest='log_path', default=os.path.join(os.path.dirname(__file__), 'AuditLogCollector.log')) + parser.add_argument('-d', action='store_true', dest='debug_logging', + help='Enable debug logging (generates large log files and decreases performance).') parser.add_argument('-f', help='Output to file.', action='store_true', dest='file') parser.add_argument('-fP', metavar='file_output_path', type=str, help='Path of directory of output files', default=os.path.join(os.path.dirname(__file__), 'output'), action='store', dest='output_path') + parser.add_argument('-P', help='Output to PRTG with PrtgConfig.yaml.', action='store_true', dest='prtg') parser.add_argument('-a', help='Output to Azure Log Analytics workspace.', action='store_true', dest='azure') parser.add_argument('-aC', metavar='azure_workspace', type=str, help='ID of log analytics workspace.', action='store', dest='azure_workspace') @@ -321,11 +648,15 @@ def known_content(self): dest='graylog_addr') parser.add_argument('-gP', metavar='graylog_port', type=str, help='Port of graylog server.', action='store', dest='graylog_port') - parser.add_argument('-d', action='store_true', dest='debug_logging', - help='Enable debug logging (generates large log files and decreases performance).') args = parser.parse_args() argsdict = vars(args) + if argsdict['interactive_subscriber']: + subscriber = AuditLogSubscriber.AuditLogSubscriber( + tenant_id=argsdict['tenant_id'], secret_key=argsdict['secret_key'], client_key=argsdict['client_key']) + subscriber.interactive() + quit(0) + content_types = [] if argsdict['general']: content_types.append('Audit.General') @@ -339,28 +670,22 @@ def known_content(self): content_types.append('DLP.All') fallback_time = None - if argsdict['time_days']: - fallback_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=argsdict['time_days']) - elif argsdict['time_hours']: + if argsdict['time_hours']: fallback_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(days=argsdict['time_hours']) - logger = logging.getLogger() - fileHandler = logging.FileHandler(argsdict['log_path'], mode='w') - streamHandler = logging.StreamHandler(sys.stdout) - logger.addHandler(streamHandler) - logger.addHandler(fileHandler) - logger.setLevel(logging.INFO if not argsdict['debug_logging'] else logging.DEBUG) - logging.log(level=logging.INFO, msg='Starting run @ {0}'.format(datetime.datetime.now())) - collector = AuditLogCollector( tenant_id=argsdict['tenant_id'], secret_key=argsdict['secret_key'], client_key=argsdict['client_key'], content_types=content_types, publisher_id=argsdict['publisher_id'], resume=argsdict['resume'], - fallback_time=fallback_time, - file_output=argsdict['file'], output_path=argsdict['output_path'], - azure_oms_output=argsdict['azure'], azure_oms_workspace_id=argsdict['azure_workspace'], - azure_oms_shared_key=argsdict['azure_key'], - graylog_address=argsdict['graylog_addr'], graylog_port=argsdict['graylog_port'], + fallback_time=fallback_time, skip_known_logs=argsdict['skip_known_logs'], log_path=argsdict['log_path'], + file_output=argsdict['file'], output_path=argsdict['output_path'], debug=argsdict['debug_logging'], + prtg_output=argsdict['prtg'], + azure_oms_output=argsdict['azure'], workspace_id=argsdict['azure_workspace'], + shared_key=argsdict['azure_key'], + gl_address=argsdict['graylog_addr'], gl_port=argsdict['graylog_port'], graylog_output=argsdict['graylog']) + if argsdict['config']: + collector.load_config(path=argsdict['config']) + collector.init_logging() collector.run_once() diff --git a/Source/GUI.py b/Source/GUI.py index a99fb2a..6053d79 100644 --- a/Source/GUI.py +++ b/Source/GUI.py @@ -5,11 +5,19 @@ from kivy.clock import Clock from kivy.properties import StringProperty import os +import sys import json import time import logging import threading import datetime +root_path = os.path.split(__file__)[0] +if getattr(sys, 'frozen', False): + icon_path = os.path.join(sys._MEIPASS, os.path.join(root_path, "icon.ico")) +else: + icon_path = os.path.join(root_path, "icon.ico") +Config.set('kivy','window_icon', icon_path) +Config.set('graphics', 'resizable', False) Config.set('graphics', 'width', '450') Config.set('graphics', 'height', '600') Config.write() @@ -84,13 +92,13 @@ def _update_run_statistics(self): prefix.ids.collector_widget.ids.run_time.text = str(datetime.datetime.now() - self.collector.run_started) prefix.ids.collector_widget.ids.retrieved_label.text = str(self.collector.logs_retrieved) prefix.ids.collector_widget.ids.azure_sent_label.text = str( - self.collector._azure_oms_interface.successfully_sent) + self.collector.azure_oms_interface.successfully_sent) prefix.ids.collector_widget.ids.azure_error_label.text = str( - self.collector._azure_oms_interface.unsuccessfully_sent) + self.collector.azure_oms_interface.unsuccessfully_sent) prefix.ids.collector_widget.ids.graylog_sent_label.text = str( - self.collector._graylog_interface.successfully_sent) + self.collector.graylog_interface.successfully_sent) prefix.ids.collector_widget.ids.graylog_error_label.text = str( - self.collector._graylog_interface.unsuccessfully_sent) + self.collector.graylog_interface.unsuccessfully_sent) def run_once(self): @@ -139,10 +147,10 @@ def _prepare_to_run(self): self.collector.output_path = prefix.ids.config_widget.ids.file_output_path.text self.collector.azure_oms_output = prefix.ids.config_widget.ids.oms_output_switch.active self.collector.graylog_output = prefix.ids.config_widget.ids.graylog_output_switch.active - self.collector._azure_oms_interface.workspace_id = prefix.ids.config_widget.ids.oms_id.text - self.collector._azure_oms_interface.shared_key = prefix.ids.config_widget.ids.oms_key.text - self.collector._graylog_interface.gl_address = prefix.ids.config_widget.ids.graylog_ip.text - self.collector._graylog_interface.gl_port = prefix.ids.config_widget.ids.graylog_port.text + self.collector.azure_oms_interface.workspace_id = prefix.ids.config_widget.ids.oms_id.text + self.collector.azure_oms_interface.shared_key = prefix.ids.config_widget.ids.oms_key.text + self.collector.graylog_interface.gl_address = prefix.ids.config_widget.ids.graylog_ip.text + self.collector.graylog_interface.gl_port = prefix.ids.config_widget.ids.graylog_port.text if prefix.ids.config_widget.ids.log_switch.active: logging.basicConfig(filemode='w', filename='logs.txt', level=logging.DEBUG) @@ -153,9 +161,10 @@ def guid_example(self): def build(self): + self.icon = icon_path self.theme_cls.theme_style = "Dark" from UX import MainWidget - Builder.load_file(os.path.join(os.path.split(__file__)[0], 'UX/MainWidget.kv')) + Builder.load_file(os.path.join(root_path, 'UX/MainWidget.kv')) self.root_widget = MainWidget.MainWidget() prefix = self.root_widget.ids.tab_widget prefix.ids.config_widget.ids.clear_known_content.disabled = not os.path.exists('known_content') @@ -230,14 +239,16 @@ def save_settings(self): settings['gl_address'] = prefix.ids.config_widget.ids.graylog_ip.text settings['gl_port'] = prefix.ids.config_widget.ids.graylog_port.text settings['debug_logging'] = prefix.ids.config_widget.ids.log_switch.active - with open('gui_settings.json', 'w') as ofile: + settings_file = os.path.join(root_path, 'gui_settings.json') + with open(settings_file, 'w') as ofile: json.dump(settings, ofile) def load_settings(self): - if not os.path.exists('gui_settings.json'): + settings_file = os.path.join(root_path, 'gui_settings.json') + if not os.path.exists(settings_file): return - with open('gui_settings.json', 'r') as ofile: + with open(settings_file, 'r') as ofile: settings = json.load(ofile) prefix = self.root_widget.ids.tab_widget self.tenant_id = settings['tenant_id'] diff --git a/Source/AzureOMSInterface.py b/Source/Interfaces/AzureOMSInterface.py similarity index 77% rename from Source/AzureOMSInterface.py rename to Source/Interfaces/AzureOMSInterface.py index 1cc0689..62710b3 100644 --- a/Source/AzureOMSInterface.py +++ b/Source/Interfaces/AzureOMSInterface.py @@ -1,3 +1,4 @@ +from . import _Interface import requests import requests.adapters import hashlib @@ -11,42 +12,27 @@ import datetime -class AzureOMSInterface: +class AzureOMSInterface(_Interface.Interface): - def __init__(self, workspace_id, shared_key, max_threads=50): + def __init__(self, workspace_id=None, shared_key=None, max_threads=50, **kwargs): """ Interface to send logs to an Azure Log Analytics Workspace. :param workspace_id: Found under "Agent Configuration" blade (str) :param shared_key: Found under "Agent Configuration" blade (str) """ + super().__init__(**kwargs) self.workspace_id = workspace_id self.shared_key = shared_key self.max_threads = max_threads self.threads = collections.deque() - self.monitor_thread = None - self.queue = collections.deque() - self.successfully_sent = 0 - self.unsuccessfully_sent = 0 self.session = requests.Session() adapter = requests.adapters.HTTPAdapter(pool_connections=self.max_threads, pool_maxsize=self.max_threads) self.session.mount('https://', adapter) - def start(self): - - self.monitor_thread = threading.Thread(target=self.monitor_queue, daemon=True) - self.monitor_thread.start() - - def stop(self, gracefully=True): - - if gracefully: - self.queue.append(('stop monitor thread', '')) - else: - self.queue.appendleft(('stop monitor thread', '')) - if self.monitor_thread.is_alive(): - self.monitor_thread.join() - def monitor_queue(self): - + """ + Overloaded for multithreading. + """ while 1: self.threads = [running_thread for running_thread in self.threads if running_thread.is_alive()] if self.queue and len(self.threads) < self.max_threads: @@ -55,17 +41,12 @@ def monitor_queue(self): [running_thread.join() for running_thread in self.threads] return else: - new_thread = threading.Thread(target=self._send_message_to_oms, + new_thread = threading.Thread(target=self._send_message, kwargs={"msg": msg, "content_type": content_type}, daemon=True) new_thread.start() self.threads.append(new_thread) - def send_messages_to_oms(self, *messages, content_type): - - for message in messages: - self.queue.append((message, content_type)) - - def _send_message_to_oms(self, msg, content_type, retries=3): + def _send_message(self, msg, content_type, retries=3): """ Send a single message to a graylog input; the socket must be closed after each individual message, otherwise Graylog will interpret it as a single large message. @@ -77,7 +58,7 @@ def _send_message_to_oms(self, msg, content_type, retries=3): return while True: try: - self.post_data(body=msg_string, log_type=content_type.replace('.', ''), time_generated=time_generated) + self._post_data(body=msg_string, log_type=content_type.replace('.', ''), time_generated=time_generated) except Exception as e: logging.error("Error sending to OMS: {}. Retries left: {}".format(e, retries)) if retries: @@ -91,7 +72,7 @@ def _send_message_to_oms(self, msg, content_type, retries=3): self.successfully_sent += 1 break - def build_signature(self, date, content_length, method, content_type, resource): + def _build_signature(self, date, content_length, method, content_type, resource): """ Returns authorization header which will be used when sending data into Azure Log Analytics. """ @@ -105,7 +86,7 @@ def build_signature(self, date, content_length, method, content_type, resource): authorization = "SharedKey {}:{}".format(self.workspace_id, encoded_hash) return authorization - def post_data(self, body, log_type, time_generated): + def _post_data(self, body, log_type, time_generated): """ Sends payload to Azure Log Analytics Workspace. :param body: payload to send to Azure Log Analytics (json.dumps str) @@ -117,7 +98,7 @@ def post_data(self, body, log_type, time_generated): resource = '/api/logs' rfc1123date = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT') content_length = len(body) - signature = self.build_signature(rfc1123date, content_length, method, content_type, resource) + signature = self._build_signature(rfc1123date, content_length, method, content_type, resource) uri = 'https://' + self.workspace_id + '.ods.opinsights.azure.com' + resource + '?api-version=2016-04-01' diff --git a/Source/GraylogInterface.py b/Source/Interfaces/GraylogInterface.py similarity index 60% rename from Source/GraylogInterface.py rename to Source/Interfaces/GraylogInterface.py index 86dcf15..f677f75 100644 --- a/Source/GraylogInterface.py +++ b/Source/Interfaces/GraylogInterface.py @@ -1,4 +1,4 @@ -# Standard libs +from . import _Interface from collections import deque import logging import threading @@ -7,55 +7,15 @@ import time -class GraylogInterface(object): +class GraylogInterface(_Interface.Interface): - def __init__(self, graylog_address, graylog_port): + def __init__(self, graylog_address=None, graylog_port=None, **kwargs): + super().__init__(**kwargs) self.gl_address = graylog_address self.gl_port = graylog_port - self.monitor_thread = None - self.queue = deque() - self.successfully_sent = 0 - self.unsuccessfully_sent = 0 - def start(self): - - self.monitor_thread = threading.Thread(target=self.monitor_queue, daemon=True) - self.monitor_thread.start() - - def stop(self, gracefully=True): - - if gracefully: - self.queue.append('stop monitor thread') - else: - self.queue.appendleft('stop monitor thread') - if self.monitor_thread.is_alive(): - self.monitor_thread.join() - - def monitor_queue(self): - - while 1: - if self.queue: - msg = self.queue.popleft() - if msg == 'stop monitor thread': - return - else: - self._send_message_to_graylog(msg=msg) - - def send_messages_to_graylog(self, *messages): - - for message in messages: - self.queue.append(message) - - def _connect_to_graylog_input(self): - """ - Return a socket connected to the Graylog input. - """ - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.connect((self.gl_address, int(self.gl_port))) - return s - - def _send_message_to_graylog(self, msg, retries=3): + def _send_message(self, msg, retries=3, **kwargs): """ Send a single message to a graylog input; the socket must be closed after each individual message, otherwise Graylog will interpret it as a single large message. @@ -86,3 +46,11 @@ def _send_message_to_graylog(self, msg, retries=3): logging.error("Error sending message to graylog: {}.".format(e)) sock.close() self.successfully_sent += 1 + + def _connect_to_graylog_input(self): + """ + Return a socket connected to the Graylog input. + """ + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((self.gl_address, int(self.gl_port))) + return s diff --git a/Source/Interfaces/PRTGInterface.py b/Source/Interfaces/PRTGInterface.py new file mode 100644 index 0000000..463b488 --- /dev/null +++ b/Source/Interfaces/PRTGInterface.py @@ -0,0 +1,45 @@ +from . import _Interface +import threading +import collections +from prtg.sensor.result import CustomSensorResult + + +class PRTGInterface(_Interface.Interface): + + def __init__(self, config=None, **kwargs): + """ + Interface to send logs to an Azure Log Analytics Workspace. + """ + super().__init__(**kwargs) + self.config = config + self.results = collections.defaultdict(collections.deque) + + def _send_message(self, msg, content_type, **kwargs): + + for channel in self.config['channels']: + if content_type not in channel['filters']: + continue + self._filter_result(msg=msg, content_type=content_type, channel=channel) + + def _filter_result(self, msg, content_type, channel): + + for filter_key, filter_value in channel['filters'][content_type].items(): + if filter_key not in msg or filter_value.lower() != msg[filter_key].lower(): + return + self.results[channel['name']].append(msg) + + def output(self): + try: + csr = CustomSensorResult() + for channel in self.config['channels']: + if channel['name'] not in self.results: + self.results[channel['name']] = collections.deque() + for channel_name, messages in self.results.items(): + csr.add_channel( + name=channel_name, value=len(messages), unit='Count') + print(csr.json_result) + except Exception as e: + csr = CustomSensorResult(text="Python Script execution error") + csr.error = "Python Script execution error: %s" % str(e) + print(csr.json_result) + diff --git a/Source/Interfaces/_Interface.py b/Source/Interfaces/_Interface.py new file mode 100644 index 0000000..b8d0909 --- /dev/null +++ b/Source/Interfaces/_Interface.py @@ -0,0 +1,60 @@ +from collections import deque +import threading + + +class Interface(object): + + def __init__(self, **kwargs): + + self.monitor_thread = None + self.queue = deque() + self.successfully_sent = 0 + self.unsuccessfully_sent = 0 + + def start(self): + """ + Start monitoring for messages to dispatch. + """ + self.monitor_thread = threading.Thread(target=self.monitor_queue, daemon=True) + self.monitor_thread.start() + + def stop(self, gracefully=True): + """ + Stop the interface gracefully or forcefully. + :param gracefully: wait for all messages to be dispatched (Bool) + """ + if gracefully: + self.queue.append('stop monitor thread') + else: + self.queue.appendleft('stop monitor thread') + if self.monitor_thread.is_alive(): + self.monitor_thread.join() + + def monitor_queue(self): + """ + Check the message queue and dispatch them when found. + """ + while 1: + if self.queue: + msg, content_type = self.queue.popleft() + if msg == 'stop monitor thread': + return + else: + self._send_message(msg=msg, content_type=content_type) + + def send_messages(self, *messages, content_type): + """ + Send message(s) to this interface. They will be handled asynchronously. + :param messages: list of dict + :param content_type: str + """ + for message in messages: + self.queue.append((message, content_type)) + + def _send_message(self, msg, content_type, **kwargs): + """ + Overload and implement actual sending of the message to the interface. + :param msg: dict + :param content_type: str + """ + pass diff --git a/Source/icon.ico b/Source/icon.ico new file mode 100644 index 0000000..7fadbf9 Binary files /dev/null and b/Source/icon.ico differ diff --git a/requirements.txt b/Source/requirements.txt similarity index 64% rename from requirements.txt rename to Source/requirements.txt index afc2373..67dee77 100644 Binary files a/requirements.txt and b/Source/requirements.txt differ diff --git a/Windows/GUI - Office Audit Log Collector.exe b/Windows/GUI-OfficeAuditLogCollector-v1.1.exe similarity index 80% rename from Windows/GUI - Office Audit Log Collector.exe rename to Windows/GUI-OfficeAuditLogCollector-v1.1.exe index bffa194..6273b11 100644 Binary files a/Windows/GUI - Office Audit Log Collector.exe and b/Windows/GUI-OfficeAuditLogCollector-v1.1.exe differ diff --git a/Windows/Office Audit Log Collector.exe b/Windows/Office Audit Log Collector.exe deleted file mode 100644 index c752b96..0000000 Binary files a/Windows/Office Audit Log Collector.exe and /dev/null differ diff --git a/Windows/Office Audit Log Subscriber.exe b/Windows/OfficeAuditLogCollector-V1.1.exe similarity index 95% rename from Windows/Office Audit Log Subscriber.exe rename to Windows/OfficeAuditLogCollector-V1.1.exe index 7decd73..552d6fa 100644 Binary files a/Windows/Office Audit Log Subscriber.exe and b/Windows/OfficeAuditLogCollector-V1.1.exe differ