diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..cc97a06 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +Release/Linux/OfficeAuditLogCollector filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..1c23d42 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,48 @@ +# +name: Create and publish a Docker image + +# Configures this workflow to run every time a change is pushed to the branch called `release`. +on: + push: + branches: ['release'] + +# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. +jobs: + build-and-push-image: + runs-on: ubuntu-latest + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + # + steps: + - name: Checkout repository + uses: actions/checkout@v4 + # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. + # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. + # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. + - name: Build and push Docker image + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: ./Release + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} diff --git a/Cargo.lock b/Cargo.lock index fbf6bde..7abdd5a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,26 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "OfficeAuditLogCollector" -version = "0.1.0" -dependencies = [ - "chrono", - "clap", - "csv", - "futures", - "log", - "poston", - "reqwest", - "serde", - "serde_derive", - "serde_json", - "serde_yaml", - "simple_logger", - "tokio", - "tokio-stream", -] - [[package]] name = "addr2line" version = "0.21.0" @@ -786,6 +766,26 @@ dependencies = [ "memchr", ] +[[package]] +name = "office_audit_log_collector" +version = "2.5.0" +dependencies = [ + "chrono", + "clap", + "csv", + "futures", + "log", + "poston", + "reqwest", + "serde", + "serde_derive", + "serde_json", + "serde_yaml", + "simple_logger", + "tokio", + "tokio-stream", +] + [[package]] name = "once_cell" version = "1.19.0" diff --git a/Cargo.toml b/Cargo.toml index bf98508..10c634a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "OfficeAuditLogCollector" -version = "0.1.0" +name = "office_audit_log_collector" +version = "2.5.0" edition = "2021" diff --git a/ConfigExamples/azureBlob.yaml b/ConfigExamples/azureBlob.yaml deleted file mode 100644 index a28e657..0000000 --- a/ConfigExamples/azureBlob.yaml +++ /dev/null @@ -1,15 +0,0 @@ -collect: - contentTypes: - Audit.General: True - Audit.AzureActiveDirectory: True - Audit.Exchange: True - Audit.SharePoint: True - DLP.All: True -output: - azureBlob: - enabled: True - containerName: audit-logs - blobName: AuditLog - tempPath: 'output' - separateByContentType: True - separator: ';' \ No newline at end of file diff --git a/ConfigExamples/azureLogAnalytics.yaml b/ConfigExamples/azureLogAnalytics.yaml deleted file mode 100644 index b5dba14..0000000 --- a/ConfigExamples/azureLogAnalytics.yaml +++ /dev/null @@ -1,12 +0,0 @@ -collect: # Settings determining which audit logs to collect and how to do it - contentTypes: - Audit.General: True - Audit.AzureActiveDirectory: True - Audit.Exchange: True - Audit.SharePoint: True - DLP.All: True -output: - azureLogAnalytics: - enabled: True - workspaceId: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx - sharedKey: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx \ No newline at end of file diff --git a/ConfigExamples/azureTable.yaml b/ConfigExamples/azureTable.yaml deleted file mode 100644 index 20f75dd..0000000 --- a/ConfigExamples/azureTable.yaml +++ /dev/null @@ -1,11 +0,0 @@ -collect: # Settings determining which audit logs to collect and how to do it - contentTypes: - Audit.General: True - Audit.AzureActiveDirectory: True - Audit.Exchange: True - Audit.SharePoint: True - DLP.All: True -output: - azureTable: - enabled: True - tableName: AuditLogs \ No newline at end of file diff --git a/ConfigExamples/fullConfig.yaml b/ConfigExamples/fullConfig.yaml deleted file mode 100644 index 75939cd..0000000 --- a/ConfigExamples/fullConfig.yaml +++ /dev/null @@ -1,64 +0,0 @@ -log: # Log settings. Debug will severely decrease performance - path: 'collector.log' - debug: False -collect: # Settings determining which audit logs to collect and how to do it - workingDir: ./ # Directory to save cache files in (known_logs, known_content, last_run). Default is dir where executable is located - contentTypes: - Audit.General: True - Audit.AzureActiveDirectory: True - Audit.Exchange: True - Audit.SharePoint: True - DLP.All: True - cacheSize: 500000 # Amount of logs to cache/batch until outputting, larger=faster but eats more memory - maxThreads: 50 # Maximum number of simultaneous threads retrieving logs - globalTimeout: 0 # Number of minutes before the process is forced to exit if still running (0 = no timeout). If you run e.g. every hour you could set this to 59, ensuring there will only be 1 active process. - retries: 3 # Times to retry retrieving a content blob if it fails - retryCooldown: 3 # Seconds to wait before retrying retrieving a content blob - autoSubscribe: True # Automatically subscribe to collected content types. Never unsubscribes from anything. - skipKnownLogs: True # Remember retrieved log ID's, don't collect them twice - hoursToCollect: 24 # Look back this many hours for audit logs (can be overwritten by resume) - filter: # Only logs that match ALL filters for a content type are collected. Leave empty to collect all - Audit.General: - Audit.AzureActiveDirectory: - Audit.Exchange: - Audit.SharePoint: - DLP.All: -output: - file: # CSV output - enabled: False - separateByContentType: True # Creates a separate CSV file for each content type, using file name from 'path' as a prefix - path: 'output.csv' - separator: ';' - azureLogAnalytics: - enabled: False - workspaceId: - sharedKey: - maxThreads: 50 # Maximum simultaneous threads sending logs to workspace - azureTable: # Provide connection string to executable at runtime with --table-string - enabled: False - tableName: AuditLogs # Name of the table inside the storage account - maxThreads: 10 # Maximum simultaneous threads sending logs to Table - azureBlob: # Write CSV to a blob container. Provide connection string to executable at runtime with --blob-string - enabled: False - containerName: AuditLogs # Name of the container inside storage account - blobName: AuditLog # When separatedByContentType is true, this is used as file prefix and becomes e.g. AuditLog_AuditExchange.csv - tempPath: './output' - separateByContentType: True - separator: ';' - cacheSize: 500000 # Amount of logs to cache until each CSV commit, larger=faster but eats more memory - sql: # Provide connection string to executable at runtime with --sql-string - enabled: False - cacheSize: 500000 # Amount of logs to cache until each SQL commit, larger=faster but eats more memory - chunkSize: 2000 # Amount of rows to write simultaneously to SQL, in most cases just set it as high as your DB allows. COUNT errors = too high - graylog: - enabled: False - address: - port: - prtg: - enabled: False - channels: - fluentd: - enabled: True - tenantName: - address: - port: diff --git a/ConfigExamples/prtg.yaml b/ConfigExamples/prtg.yaml deleted file mode 100644 index 4f5c0ed..0000000 --- a/ConfigExamples/prtg.yaml +++ /dev/null @@ -1,26 +0,0 @@ -collect: - contentTypes: - Audit.General: True - Audit.AzureActiveDirectory: True - Audit.SharePoint: True - skipKnownLogs: False # Take all logs each time to count the number of active filter hits each interval - resume: False # Take all logs each time to count the number of active filter hits each interval - hoursToCollect: 1 # Period over which to alert, e.g. failed AAD logins over the last hour -# The PRTG output defines channels which have filters associated to them. The output of the channel will be -# the number of hits on the filter. E.g. filter for failed AAD logins on a "Failed AAD logins" channel. -output: - prtg: - enabled: True - channels: - - name: Deleted Sharepoint files - filters: - Audit.SharePoint: - Operation: FileDeleted - - name: Failed Azure AD logins - filters: - Audit.AzureActiveDirectory: - Operation: UserLoginFailed - - name: Spoof attempts prevented - filters: - Audit.General: - Policy: Spoof \ No newline at end of file diff --git a/ConfigExamples/sql.yaml b/ConfigExamples/sql.yaml deleted file mode 100644 index fdb5603..0000000 --- a/ConfigExamples/sql.yaml +++ /dev/null @@ -1,12 +0,0 @@ -collect: - contentTypes: - Audit.General: True - Audit.AzureActiveDirectory: True - Audit.Exchange: True - Audit.SharePoint: True - DLP.All: True -output: - sql: - enabled: True - cacheSize: 500000 - chunkSize: 2000 \ No newline at end of file diff --git a/README.md b/README.md index 8a4a87d..4a62a30 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,34 @@ # Anouncements: -- I was asked to write an article for the Graylog community, giving a more detailed look at how to use -this tool. If this might be useful to you, you can find it -[here](https://community.graylog.org/t/collecting-office365-azuread-audit-logs-using-office-audit-collector/23925). - -- Also, shoutout to [Jetbrains](https://www.jetbrains.com/all/) -for sponsoring an all product license for their IDEs for this open source -project. Much appreciated. -- The 'resume' parameter has been deprecated. Sometimes logs are published to the API with a delay -and this causes issues with the 'resume' parameter. In hindsight this parameter was a mistake. It now -generates a warning when used, but in the future it will be removed. If you are using it, please consider -setting it to 'false'. If you were using it to prevent duplicate logs, set 'skipKnownLogs' to true instead. +Due to a busy real life schedule I have noticed I am not able to support this tool as much as I want to. +This has resulted in issues staying open for too long. In order to remedy this some radical changes were needed: + + +#### Full Rust rewrite + +The engine was already rewritten in Rust for performance. Looking at the issues however, most crashes came from Pythons +loose typing. Building in PyInstaller was also a drag due to some libs not playing nice with it. Now the entire tool has been +rewritten in Rust, I'm hoping for more stability. + +#### Support only what is necessary + +Many interfaces and config options have become superfluous. For example, most Azure interfaces are now useless, as +Azure Sentinel supports direct retrieval of audit logs. By including only the features that will actually be used in the +rewrite, I'm hoping I'll be able to maintain the smaller codebase in my limited free time. The new interfaces are: +- Csv file +- Graylog +- Fluentd + +If you were using an interface that was dropped, keep using the previous version and raise an issue asking for the +interface to be included. I don't mind writing an interface for one person, I only mind writing it for no one. + +#### Add container releases + +While binaries will still be available, the primary method of release should be containers. This will hopefully +reduce the amount of questions people have regarding how to run the tool, as only the container and a config file will +be necessary. + + # Office365 audit log collector @@ -18,23 +36,16 @@ Collect/retrieve Office365, Azure and DLP audit logs, optionally filter them, th (see full list below). Onboarding is easy and takes only a few minutes (see 'Onboarding' section). There are Windows and Linux executables. Configuration is easy with a YAML config file (see the 'ConfigExamples' folder for reference). -If you have any issues or questions, or requests for additional interfaces, feel free to create an issue in this repo. -- The following Audit logs can be extracted: +If you have any issues or questions, or requests for additional interfaces, feel free to create an issue in this repo. - The following Audit logs can be extracted: - Audit.General - Audit.AzureActiveDirectory - Audit.Exchange - Audit.SharePoint - DLP.All - The following outputs are supported: - - Azure Analytics Workspace (OMS) - - Azure Storage Table - - Azure Storage Blob - - PRTG Network Monitor - - ( Azure ) SQL server - Graylog (or any other source that accepts a simple socket connection) - Fluentd - CSV Local file - - Power BI (indirectly through SQL, CSV, Azure Tables or Azure Blob) Feel free to contribute other outputs if you happen to build any. Also open to any other useful pull requests! See the following link for more info on the management APIs: https://msdn.microsoft.com/en-us/office-365/office-365-management-activity-api-reference. @@ -44,11 +55,10 @@ See the following link for more info on the management APIs: https://msdn.micros - Ad-lib log retrieval; - Scheduling regular execution to retrieve the full audit trail - Output to Graylog/fluentd for full audit trails in SIEM -- Output to PRTG for alerts on audit logs -- Output to (Azure) SQL / CSV for Power BI - Etc. ## Latest changes: +- Full rust rewrite - Deprecated 'resume' parameter. - Added native timestamp field to logs for graylog output - Added fluentd support (thanks @owentl) @@ -92,69 +102,20 @@ See the following link for more info on the management APIs: https://msdn.micros - Check 'ActivityFeed.Read' - Check 'ActivityFeed.ReadDlp' - Hit 'Add permissions' -- Subscribe to audit log feeds of your choice - - Set 'autoSubscribe: True' in YAML config file to automate this. - - OR Use the '--interactive-subscriber' parameter when executing the collector to manually subscribe to the audit API's of your choice - You can now run the collector and retrieve logs. ### Running the collector: -You can schedule to run the executable with CRON or Task Scheduler. Alternatively, you can use the "schedule" option in -the YAML config to run the executable once and have it schedule itself (see ConfigExamples/schedule.yaml). - +You can schedule to run the executable with CRON or Task Scheduler. To run the command-line executable use the following syntax: -OfficeAuditLogCollector(.exe) %tenant_id% %client_key% %secret_key% --config %path/to/config.yaml% +OfficeAuditLogCollector(.exe) --tenant-id %tenant_id% --client-id %client_key% --secret-key %secret_key% --config %path/to/config.yaml% To create a config file you can start with the 'fullConfig.yaml' from the ConfigExamples folder. This has all the possible options and some explanatory comments. Cross-reference with a config example using the output(s) of your choice, and you -should be set. - -### (optional) Creating an Azure Log Analytics Workspace (OMS): - -If you are running this script to get audit events in an Azure Analytics Workspace you will need a Workspace ID and a shared key. -- Create a workspace from "Create resource" in Azure (no configuration required); -- Get the ID and key from "Agent management"; -- You do not need to prepare any tables or other settings. - -### (optional) Creating an Azure Table / Blob account: - -If you are running this script to get audit events in an Azure Table and/or Blob you will need a storage account and connection string: -- Create a storage account from "Create resource" in Azure (no special configuration required); -- Get the connection string from 'Access keys' -- You do not need to prepare any tables or blob containers as they are created in the storage account if they do not exist. - -### (optional) Creating a PRTG sensor - -To run with PRTG you must create a sensor: -- Copy the OfficeAuditLogCollector.exe executable to the "\Custom Sensors\EXE" sub folder of your PRTG installation -- Create a device in PRTG with any host name (e.g. "Office Audit Logs") -- Create a 'EXE/Script Advanced Sensor' on that device and choose the executable you just copied -- Enter parameters, e.g.: "*tenant_id* *client_key* *secret_key* --config *full/path/to/config.yaml*" -(use full path, because PRTG will execute the script from a different working directory) -- Copy the prtg.config from ConfigExamples and modify at least the channel names and filters for your needs. -- Set the timeout of the script to something generous that suits the amount of logs you will retrieve. -Probably at least 300 seconds. Run the script manually first to check how long it takes. -- Match the interval of the sensor to the amount of hours of logs to retrieve. If your interval is 1 hour, hoursToCollect -in the config file should also be set to one hour. - -### (optional) Using ( Azure ) SQL - -If you are running this script to get audit events in an SQL database you will need an ODBC driver and a connection string -- The collector uses PYODBC, which needs an ODBC driver, examples on how to install this: - - On windows: https://docs.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver15 - - On Linux: https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-ver15#ubuntu17 -- Connection string might look like this: "Driver={ODBC Driver 17 for SQL Server};Server=tcp:mydatabase.com,1433;Database=mydatabase;Uid=myuser;Pwd=mypassword;Encrypt -=yes;TrustServerCertificate=no;Connection Timeout=30;" -- Use SQL example config and pass --sql-string parameter when running the collector with your connection string - - - -### (optional) Creating a Graylog input - -If you are running this script to get audit events in Graylog you will need to create a Graylog input. -- Create a 'raw/plaintext TCP' input; -- Enter the IP and port you want to receive the logs on (you can use these in the script); -- All other settings can be left default. +should be set. Remember to remove (or comment out) all the outputs you do not intent to use. +### Setting up the collector for Graylog: +I wrote a full tutorial on the Graylog blog. You can find it +[here](https://community.graylog.org/t/collecting-office365-azuread-audit-logs-using-office-audit-collector/23925). \ No newline at end of file diff --git a/ConfigExamples/CsvOutput.yaml b/Release/ConfigExamples/CsvOutput.yaml similarity index 83% rename from ConfigExamples/CsvOutput.yaml rename to Release/ConfigExamples/CsvOutput.yaml index 2d7a480..bd22c9b 100644 --- a/ConfigExamples/CsvOutput.yaml +++ b/Release/ConfigExamples/CsvOutput.yaml @@ -1,5 +1,6 @@ collect: skipKnownLogs: True + workingDir: /app contentTypes: Audit.General: True Audit.AzureActiveDirectory: True @@ -8,6 +9,6 @@ collect: DLP.All: True output: file: - path: 'output.csv' + path: '/app/output.csv' separateByContentType: True separator: ';' \ No newline at end of file diff --git a/ConfigExamples/filter.yaml b/Release/ConfigExamples/filter.yaml similarity index 100% rename from ConfigExamples/filter.yaml rename to Release/ConfigExamples/filter.yaml diff --git a/ConfigExamples/fluentd.yaml b/Release/ConfigExamples/fluentd.yaml similarity index 100% rename from ConfigExamples/fluentd.yaml rename to Release/ConfigExamples/fluentd.yaml diff --git a/Release/ConfigExamples/fullConfig.yaml b/Release/ConfigExamples/fullConfig.yaml new file mode 100644 index 0000000..071ae44 --- /dev/null +++ b/Release/ConfigExamples/fullConfig.yaml @@ -0,0 +1,38 @@ +log: # Log settings. Debug will severely decrease performance + path: 'collector.log' + debug: False +collect: # Settings determining which audit logs to collect and how to do it + workingDir: ./ # Directory to save cache files in (known_logs, known_content, last_run). Default is dir where executable is located + contentTypes: + Audit.General: True + Audit.AzureActiveDirectory: True + Audit.Exchange: True + Audit.SharePoint: True + DLP.All: True + cacheSize: 500000 # Amount of logs to cache/batch until outputting, larger=faster but eats more memory + maxThreads: 50 # Maximum number of simultaneous threads retrieving logs + globalTimeout: 1 # Number of minutes before the process is forced to exit if still running (0 = no timeout). If you run e.g. every hour you could set this to 59, ensuring there will only be 1 active process. + retries: 3 # Times to retry retrieving a content blob if it fails + skipKnownLogs: True # Remember retrieved log blobs, don't collect them twice + hoursToCollect: 24 # Look back this many hours for audit logs (max supported by Office API is 168) + filter: # Only logs that match ALL filters for a content type are collected. Leave empty to collect all + Audit.General: + Audit.AzureActiveDirectory: + Audit.Exchange: + Audit.SharePoint: + DLP.All: +output: # Make sure to remove or comment out all outputs you do not intend to use + file: # CSV output + enabled: False + separateByContentType: True # Creates a separate CSV file for each content type, using file name from 'path' as a prefix + path: 'output.csv' + separator: ';' +# graylog: +# enabled: False +# address: localhost +# port: 5555 +# fluentd: +# enabled: True +# tenantName: myorg +# address: localhost +# port: 5555 diff --git a/ConfigExamples/graylog.yaml b/Release/ConfigExamples/graylog.yaml similarity index 75% rename from ConfigExamples/graylog.yaml rename to Release/ConfigExamples/graylog.yaml index de131b5..6b26807 100644 --- a/ConfigExamples/graylog.yaml +++ b/Release/ConfigExamples/graylog.yaml @@ -7,6 +7,5 @@ collect: DLP.All: True output: graylog: - enabled: True - address: 172.16.1.1 - port: 5000 \ No newline at end of file + address: localhost + port: 5555 \ No newline at end of file diff --git a/Release/Dockerfile b/Release/Dockerfile new file mode 100644 index 0000000..28f27ce --- /dev/null +++ b/Release/Dockerfile @@ -0,0 +1,15 @@ +FROM debian:stable-slim + +COPY Linux/OfficeAuditLogCollector / + +RUN apt-get update && apt-get install ca-certificates -y + +WORKDIR /app + +RUN \ + chmod +x /OfficeAuditLogCollector && \ + chown -R 1001:1001 /app /OfficeAuditLogCollector + +USER 1001 +CMD "ls /app -la" +ENTRYPOINT ["/OfficeAuditLogCollector"] diff --git a/Release/Linux/OfficeAuditLogCollector b/Release/Linux/OfficeAuditLogCollector new file mode 100755 index 0000000..20ade33 Binary files /dev/null and b/Release/Linux/OfficeAuditLogCollector differ diff --git a/Source/ApiConnection.py b/Source/ApiConnection.py deleted file mode 100644 index b86e6af..0000000 --- a/Source/ApiConnection.py +++ /dev/null @@ -1,76 +0,0 @@ -import sys -import requests -import logging -import urllib.parse - - -class ApiConnection(object): - - def __init__(self, tenant_id=None, client_key=None, secret_key=None, publisher_id=None, **kwargs): - """ - Object that creates the authorization headers for- and sends API requests to the Microsoft Office APIs'. - Taken from a Microsoft sample script that I cannot find the original of to reference. - :param tenant_id: tenant ID of of Office/Azure subscription - :param client_key: key (ID) of the application created in Azure to allow API access - :param secret_key: key (secret) generated by the application created in Azure - :param publisher_id: random GUID for API throttling; if none is given you are using public API limits and will probably be throttled (str) - """ - self.tenant_id = tenant_id - self.client_key = client_key - self.secret_key = secret_key - self.publisher_id = publisher_id - self._headers = None - - @property - def headers(self): - """ - Generate headers once then return from cache. - :return: authorization headers to use in https requests - """ - if not self._headers: - self._headers = self.login() - return self._headers - - def login(self): - """ - Login to get access token and cache it to make API requests with. - :return: authorization headers (dict) - """ - headers = {'Content-Type': 'application/x-www-form-urlencoded'} - auth_url = 'https://login.microsoftonline.com/{0}/oauth2/token'.format(self.tenant_id) - resource = 'https://manage.office.com' - data = 'grant_type=client_credentials&client_id={0}&client_secret={1}&resource={2}'.format( - self.client_key, urllib.parse.quote(self.secret_key), resource) - r = requests.post(auth_url, headers=headers, data=data, verify=True) - resp = r.json() - if not self.publisher_id: - self.publisher_id = self.tenant_id - try: - headers['Authorization'] = 'bearer ' + resp['access_token'] - logging.log(level=logging.DEBUG, msg='Logged in') - return headers - except KeyError as e: - logging.log(level=logging.ERROR, msg='Error logging in: "{0}"'.format(e)) - sys.exit(1) - - def make_api_request(self, url, append_url=True, get=True): - """ - Make an API requests by appending the resource to the base URL. E.g. url='subscriptions/list'. - Disable append_url to make the call to the literal passed URL. - :param url: string - :param append_url: bool - :return: requests response - """ - if append_url: - url = 'https://manage.office.com/api/v1.0/{0}/activity/feed/{1}'.format(self.tenant_id, url) - if self.publisher_id: - url = '{0}{1}PublisherIdentifier={2}'.format( - url, '?' if '?' not in url.split('/')[-1] else '&', self.publisher_id if self.publisher_id else '') - logging.log(level=logging.DEBUG, msg='Making API request using URL: "{0}"'.format(url)) - if get: - status = requests.get(url, headers=self.headers, verify=True, timeout=120) - else: - status = requests.post(url, headers=self.headers, verify=True, timeout=120) - return status - - diff --git a/Source/AuditLogCollector.py b/Source/AuditLogCollector.py deleted file mode 100644 index 7c653fa..0000000 --- a/Source/AuditLogCollector.py +++ /dev/null @@ -1,507 +0,0 @@ -from Interfaces import AzureOMSInterface, SqlInterface, GraylogInterface, PRTGInterface, FileInterface, \ - AzureTableInterface, AzureBlobInterface, FluentdInterface -import alc # Rust based log collector Engine -import AuditLogSubscriber -import ApiConnection -import os -import sys -import yaml -import time -import json -import signal -import logging -import datetime -import argparse -import collections -import threading - -# Azure logger is very noisy on INFO -az_logger = logging.getLogger("azure.core.pipeline.policies.http_logging_policy") -az_logger.setLevel(logging.WARNING) - - -class AuditLogCollector(ApiConnection.ApiConnection): - - def __init__(self, config_path, **kwargs): - """ - Object that can retrieve all available content blobs for a list of content types and then retrieve those logs - and send them to a variety of outputs. - """ - super().__init__(**kwargs) - self.config = Config(path=config_path) - self.interfaces = {} - self._register_interfaces(**kwargs) - self._init_logging() - - self._last_run_times = {} - self._known_content = {} - self._known_logs = {} - - self._remaining_content_types = collections.deque() - self.blobs_to_collect = collections.defaultdict(collections.deque) - self.monitor_thread = threading.Thread() - self.retrieve_available_content_threads = collections.deque() - self.retrieve_content_threads = collections.deque() - self.run_started = None - self.logs_retrieved = 0 - self.errors_retrieving = 0 - self.retries = 0 - - self.working_dir = self.config['collect', 'workingDir'] or "./" - if not os.path.exists(self.working_dir): - os.makedirs(self.working_dir, exist_ok=True) - - def force_stop(self, *args): - - logging.info("Got a SIGINT, stopping..") - self.monitor_thread.join(timeout=10) - sys.exit(0) - - def run(self): - - if not self.config['collect', 'schedule']: - self.run_once() - else: - self.run_scheduled() - - def run_once(self): - """ - Check available content and retrieve it, then exit. - """ - self._prepare_to_run() - logging.log(level=logging.INFO, msg='Starting run @ {}. Content: {}.'.format( - datetime.datetime.now(), self.config['collect', 'contentTypes'])) - if not self.config['collect', 'rustEngine'] is False: - self._start_interfaces() - self.receive_results_from_rust_engine() - self._stop_interfaces(force=False) - self._finish_run() - - def receive_results_from_rust_engine(self): - - runs = self._get_needed_runs(content_types=self.config['collect', 'contentTypes'].copy()) - engine = alc.RustEngine(self.tenant_id, self.client_key, self.secret_key, self.publisher_id or self.tenant_id, - self.config['collect', 'contentTypes'], runs, - self.config['collect', 'maxThreads'] or 50, - self.config['collect', 'retries'] or 3) - engine.run_once() - last_received = datetime.datetime.now() - timeout = self.config['collect', 'globalTimeout'] - while True: - try: - if timeout and datetime.datetime.now() - self.run_started >= datetime.timedelta(minutes=timeout): - logging.error("Global timeout reached, killing process.") - sys.exit(-1) - result = engine.get_result() - except ValueError: # RustEngine throws this error when no logs are in the results recv queue - now = datetime.datetime.now() - if now - last_received > datetime.timedelta(seconds=60): - logging.error("Timed out waiting for results from engine") - break - last_received = now - except EOFError: # RustEngine throws this error when all content has been retrieved - logging.info("Rust engine finished receiving all content") - break - else: - content_json, content_id, content_expiration, content_type = result - self._handle_retrieved_content(content_id=content_id, content_expiration=content_expiration, - content_type=content_type, results=json.loads(content_json)) - self.logs_retrieved += 1 - _, _, self.retries, self.errors_retrieving = engine.stop() - - def run_scheduled(self): - """ - Run according to the schedule set in the config file. Collector will not exit unless manually stopped. - """ - if not self.run_started: # Run immediately initially - target_time = datetime.datetime.now() - else: - days, hours, minutes = self.config['collect', 'schedule'] - target_time = self.run_started + datetime.timedelta(days=days, hours=hours, minutes=minutes) - if datetime.datetime.now() > target_time: - logging.warning("Warning: last run took longer than the scheduled interval.") - logging.info("Next run is scheduled for: {}.".format(target_time)) - while True: - if datetime.datetime.now() > target_time: - self.run_once() - self.run_scheduled() - else: - time.sleep(1) - - def _register_interfaces(self, **kwargs): - - for interface in [FileInterface.FileInterface, AzureTableInterface.AzureTableInterface, - AzureBlobInterface.AzureBlobInterface, AzureOMSInterface.AzureOMSInterface, - SqlInterface.SqlInterface, GraylogInterface.GraylogInterface, PRTGInterface.PRTGInterface, - FluentdInterface.FluentdInterface]: - self.interfaces[interface] = interface(collector=self, **kwargs) - - @property - def _all_enabled_interfaces(self): - - return [interface for interface in self.interfaces.values() if interface.enabled] - - def _init_logging(self): - """ - Start logging to file and console. If PRTG output is enabled do not log to console, as this will interfere with - the sensor result. - """ - logger = logging.getLogger() - file_handler = logging.FileHandler(self.config['log', 'path'].strip("'") if self.config['log', 'path'] - else 'collector.log', mode='w') - if not self.interfaces[PRTGInterface.PRTGInterface].enabled: - stream_handler = logging.StreamHandler(sys.stdout) - logger.addHandler(stream_handler) - logger.addHandler(file_handler) - logger.setLevel(logging.INFO if not self.config['log', 'debug'] else logging.DEBUG) - - def _prepare_to_run(self): - """ - Make sure that self.run_once can be called multiple times by resetting to an initial state. - """ - self.config.load_config() - self._remaining_content_types = self.config['collect', 'contentTypes'] or collections.deque() - if self.config['collect', 'autoSubscribe']: - self._auto_subscribe() - if self.config['collect', 'resume']: - self._get_last_run_times() - if self.config['collect', 'skipKnownLogs']: - self._known_content.clear() - self._known_logs.clear() - self._clean_known_content() - self._clean_known_logs() - self.logs_retrieved = 0 - for interface in self._all_enabled_interfaces: - interface.reset() - self.run_started = datetime.datetime.now() - - def _finish_run(self): - """ - Save relevant information and output PRTG result if the interface is enabled. The other interfaces output - while collecting. - """ - if self.config['collect', 'skipKnownLogs']: - self._add_known_log() - self._add_known_content() - if self.config['collect', 'resume'] and self._last_run_times: - with open(os.path.join(self.working_dir, 'last_run_times'), 'w') as ofile: - json.dump(fp=ofile, obj=self._last_run_times) - self._log_statistics() - - def _log_statistics(self): - """ - Write run statistics to log file / console. - """ - logging.info("Finished. Total logs retrieved: {}. Total retries: {}. Total logs with errors: {}. Run time: {}." - .format(self.logs_retrieved, self.retries, self.errors_retrieving, datetime.datetime.now() - self.run_started)) - for interface in self._all_enabled_interfaces: - logging.info("{} reports: {} successfully sent, {} errors".format( - interface.__class__.__name__, interface.successfully_sent, interface.unsuccessfully_sent)) - - def _get_last_run_times(self): - """ - Load last_run_times file and interpret the datetime for each content type. - """ - if os.path.exists(os.path.join(self.working_dir, 'last_run_times')): - try: - with open(os.path.join(self.working_dir, 'last_run_times'), 'r') as ofile: - self._last_run_times = json.load(ofile) - except Exception as e: - logging.error("Could not read last run times file: {}.".format(e)) - for content_type, last_run_time in self._last_run_times.items(): - try: - self._last_run_times[content_type] = datetime.datetime.strptime(last_run_time, "%Y-%m-%dT%H:%M:%S%z") - except Exception as e: - logging.error("Could not read last run time for content type {}: {}.".format(content_type, e)) - del self._last_run_times[content_type] - - @property - def _done_retrieving_content(self): - """ - Returns True if there are no more content blobs to be collected. Used to determine when to exit the script. - :return: Bool - """ - for content_type in self.blobs_to_collect: - if self.blobs_to_collect[content_type]: - return False - return True - - @property - def _done_collecting_available_content(self): - """ - Once a call is made to retrieve content for a particular type, and there is no 'NextPageUri' in the response, - the type is removed from 'self.content_types' to signal that all available content has been retrieved for that - type. - """ - return not bool(self._remaining_content_types) - - def _auto_subscribe(self): - """ - Subscribe to all content types that are set to be retrieved. - """ - subscriber = AuditLogSubscriber.AuditLogSubscriber(tenant_id=self.tenant_id, client_key=self.client_key, - secret_key=self.secret_key) - status = subscriber.get_sub_status() - if status == '': - raise RuntimeError("Auto subscribe enabled but could not get subscription status") - unsubscribed_content_types = self._remaining_content_types.copy() - for s in status: - if isinstance(s, str): # For issue #18 - raise RuntimeError("Auto subscribe enabled but could not get subscription status") - if s['contentType'] in self._remaining_content_types and s['status'].lower() == 'enabled': - unsubscribed_content_types.remove(s['contentType']) - for content_type in unsubscribed_content_types: - logging.info("Auto subscribing to: {}".format(content_type)) - subscriber.set_sub_status(content_type=content_type, action='start') - - def _get_needed_runs(self, content_types): - """ - Return the start- and end times needed to retrieve content for each content type. If the timespan to retrieve - logs for exceeds 24 hours, we need to split it up into 24 hour runs (limit by Office API). - """ - runs = {} - end_time = datetime.datetime.now(datetime.timezone.utc) - for content_type in content_types: - runs[content_type] = [] - if self.config['collect', 'resume'] and content_type in self._last_run_times.keys(): - start_time = self._last_run_times[content_type] - logging.info("{} - resuming from: {}".format(content_type, start_time)) - else: - hours_to_collect = self.config['collect', 'hoursToCollect'] or 24 - start_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=hours_to_collect) - - if end_time - start_time > datetime.timedelta(hours=168): - logging.warning("Hours to collect cannot be more than 168 due to Office API limits, defaulting to 168") - end_time = start_time + datetime.timedelta(hours=168) - while True: - if end_time - start_time > datetime.timedelta(hours=24): - split_start_time = start_time - split_end_time = start_time + datetime.timedelta(hours=24) - formatted_start_time = str(split_start_time).replace(' ', 'T').rsplit('.', maxsplit=1)[0] - formatted_end_time = str(split_end_time).replace(' ', 'T').rsplit('.', maxsplit=1)[0] - runs[content_type].append((formatted_start_time, formatted_end_time)) - start_time = split_end_time - self._remaining_content_types.append(content_type) - else: - formatted_start_time = str(start_time).replace(' ', 'T').rsplit('.', maxsplit=1)[0] - formatted_end_time = str(end_time).replace(' ', 'T').rsplit('.', maxsplit=1)[0] - runs[content_type].append((formatted_start_time, formatted_end_time)) - break - self._last_run_times[content_type] = end_time.strftime("%Y-%m-%dT%H:%M:%SZ") - return runs - - def _start_interfaces(self): - - for interface in self._all_enabled_interfaces: - interface.start() - - def _stop_interfaces(self, force): - - for interface in self._all_enabled_interfaces: - interface.stop(gracefully=not force) - - def _handle_retrieved_content(self, content_id, content_expiration, content_type, results): - """ - Check known logs, filter results and output what remains. - :param content_id: ID of content blob from API (str) - :param content_expiration: date string of expiration of content blob from API (str) - :param content_type: Type of API being retrieved for, e.g. 'Audit.Exchange' (str) - :param results: list of JSON - """ - if self.config['collect', 'skipKnownLogs']: - self._known_content[content_id] = content_expiration - for log in results.copy(): - if self.config['collect', 'skipKnownLogs']: - if log['Id'] in self.known_logs: - results.remove(log) - continue - self.known_logs[log['Id']] = log['CreationTime'] - if self.config['collect', 'filter'] and not self._check_filters(log=log, content_type=content_type): - results.remove(log) - self.logs_retrieved += len(results) - self._output_results(results=results, content_type=content_type) - - def _output_results(self, results, content_type): - """ - :param content_type: Type of API being retrieved for, e.g. 'Audit.Exchange' (str) - :param results: list of JSON - """ - for interface in self._all_enabled_interfaces: - interface.send_messages(*results, content_type=content_type) - - def _check_filters(self, log, content_type): - """ - :param log: JSON - :param content_type: Type of API being retrieved for, e.g. 'Audit.Exchange' (str) - :return: True if log matches filter, False if not (Bool) - """ - filters = self.config['collect', 'filter'] - if content_type in filters and filters[content_type]: - for log_filter_key, log_filter_value in filters[content_type].items(): - if log_filter_key not in log or log[log_filter_key].lower() != log_filter_value.lower(): - return False - return True - - def _add_known_log(self): - """ - Add a content ID to the known content file to avoid saving messages more than once. - :return: - """ - with open(os.path.join(self.working_dir, 'known_logs'), 'w') as ofile: - for log_id, creation_time in self.known_logs.items(): - ofile.write('{},{}\n'.format(log_id, creation_time)) - - def _add_known_content(self): - """ - Add a content ID to the known content file to avoid saving messages more than once. - :return: - """ - with open(os.path.join(self.working_dir, 'known_content'), 'w') as ofile: - for content_id, content_expiration in self.known_content.items(): - ofile.write('{0},{1}\n'.format(content_id, content_expiration)) - - def _clean_known_logs(self): - """ - Remove any known content ID's that have expired. Can't download a duplicate if it is not available for - download. - """ - known_logs = self.known_logs - if os.path.exists(os.path.join(self.working_dir, 'known_logs')): - os.remove(os.path.join(self.working_dir, 'known_logs')) - for log_id, creation_time in known_logs.copy().items(): - try: - date = datetime.datetime.strptime(creation_time.strip()+'Z', "%Y-%m-%dT%H:%M:%S%z") - expire_date = date + datetime.timedelta(days=7) - if not datetime.datetime.now(datetime.timezone.utc) < expire_date: - del self.known_logs[log_id] - except Exception as e: - logging.debug("Could not parse known logs: {}".format(e)) - del self.known_logs[log_id] - if not known_logs: - return - with open(os.path.join(self.working_dir, 'known_logs'), mode='w') as ofile: - for log_id, creation_time in known_logs.items(): - ofile.write("{},{}\n".format(log_id, creation_time.strip())) - - def _clean_known_content(self): - """ - Remove any known content ID's that have expired. Can't download a duplicate if it is not available for - download. - """ - known_content = self.known_content - if os.path.exists(os.path.join(self.working_dir, 'known_content')): - os.remove(os.path.join(self.working_dir, 'known_content')) - for content_id, expire_date in known_content.copy().items(): - try: - date = datetime.datetime.strptime(expire_date, "%Y-%m-%dT%H:%M:%S.%f%z") - if not datetime.datetime.now(datetime.timezone.utc) < date: - del known_content[content_id] - except Exception as e: - logging.debug("Could not parse known content: {}".format(e)) - del known_content[content_id] - if not known_content: - return - with open(os.path.join(self.working_dir, 'known_content'), 'w') as ofile: - for content_id, expire_date in known_content.items(): - ofile.write("{},{}\n".format(content_id, expire_date)) - - @property - def known_logs(self): - """ - Parse and return known content file. - :return: {content_id: content_expiration_date} dict - """ - if not self._known_logs and os.path.exists(os.path.join(self.working_dir, 'known_logs')): - with open(os.path.join(self.working_dir, 'known_logs'), 'r') as ofile: - for line in ofile.readlines(): - if not line.strip(): - continue - try: - self._known_logs[line.split(',')[0].strip()] = line.split(',')[1] - except: - continue - return self._known_logs - - @property - def known_content(self): - """ - Parse and return known content file. - :return: {content_id: content_expiration_date} dict - """ - if not self._known_content and os.path.exists(os.path.join(self.working_dir, 'known_content')): - with open(os.path.join(self.working_dir, 'known_content'), 'r') as ofile: - for line in ofile.readlines(): - if not line.strip(): - continue - try: - self._known_content[line.split(',')[0].strip()] = line.split(',')[1].strip() - except: - continue - return self._known_content - - -class Config(object): - - def parse_schedule(self): - """ - :return: tuple of ints (days/hours/minutes) - """ - schedule = self._find_setting('collect', 'schedule') - if not schedule: - return - try: - schedule = [int(x) for x in schedule.split(' ')] - assert len(schedule) == 3 - except Exception as e: - raise RuntimeError( - "Could not interpret schedule. Make sure it's in the format '0 0 0' (days/hours/minutes) {}" - .format(e)) - else: - return schedule - - - - -if __name__ == "__main__": - - description = \ - """ - Retrieve audit log contents from Office 365 API and save to file or other output. - Example: Retrieve all available content and send it to an output (using mock ID's and keys): - "AuditLogCollector.py 123 456 789 --general --exchange --azure_ad --sharepoint --dlp -g -gA 10.10.10.1 -gP 5000 - """ - parser = argparse.ArgumentParser(description=description) - parser.add_argument('tenant_id', type=str, help='Tenant ID of Azure AD', action='store') - parser.add_argument('client_key', type=str, help='Client key of Azure application', action='store') - parser.add_argument('secret_key', type=str, help='Secret key generated by Azure application', action='store') - parser.add_argument('--config', metavar='config', type=str, help='Path to YAML config file', - action='store', dest='config', required=True) - parser.add_argument('--table-string', metavar='table_string', type=str, - help='Connection string for Azure Table output interface', action='store', dest='table_string') - parser.add_argument('--blob-string', metavar='blob_string', type=str, - help='Connection string for Azure Blob output interface', action='store', dest='blob_string') - parser.add_argument('--sql-string', metavar='sql_string', type=str, - help='Connection string for SQL output interface', action='store', dest='sql_string') - parser.add_argument('--interactive-subscriber', action='store_true', - help='Manually (un)subscribe to audit log feeds', dest='interactive_subscriber') - parser.add_argument('-p', metavar='publisher_id', type=str, dest='publisher_id', - help='Publisher GUID to avoid API throttling. Defaults to Tenant ID', action='store') - args = parser.parse_args() - argsdict = vars(args) - - if argsdict['interactive_subscriber']: - subscriber = AuditLogSubscriber.AuditLogSubscriber( - tenant_id=argsdict['tenant_id'], secret_key=argsdict['secret_key'], client_key=argsdict['client_key']) - subscriber.interactive() - quit(0) - - collector = AuditLogCollector( - config_path=argsdict['config'], - tenant_id=argsdict['tenant_id'], secret_key=argsdict['secret_key'], client_key=argsdict['client_key'], - publisher_id=argsdict['publisher_id'], sql_connection_string=argsdict['sql_string'], - table_connection_string=argsdict['table_string'], blob_connection_string=argsdict['blob_string']) - - signal.signal(signal.SIGINT, collector.force_stop) - collector.run() - - diff --git a/Source/AuditLogSubscriber.py b/Source/AuditLogSubscriber.py deleted file mode 100644 index bf0aa2e..0000000 --- a/Source/AuditLogSubscriber.py +++ /dev/null @@ -1,128 +0,0 @@ -""" -Taken from Microsoft sample script. -""" -# Standard libs -import sys -import logging -from collections import OrderedDict -# Internal libs -import ApiConnection - - -class AuditLogSubscriber(ApiConnection.ApiConnection): - - def get_info(self, question): - """ - Args: - question (str): question to ask user for input - - Returns: - string of user input - """ - while True: - value = input(question) - if value == '': - continue - else: - return value - - def get_sub_status(self): - - status = self.make_api_request(url='subscriptions/list', append_url=True) - return status.json() - - def set_sub_status(self, ctype_stat=None, content_type=None, action=None): - """ - Args: - ctype_stat (tuple): content type, status (enabled | disabled) - Returns: - dict - """ - content_type = content_type or ctype_stat[0] - if not action: - if ctype_stat[1] == 'enabled': - action = 'stop' - elif ctype_stat[1] == 'disabled': - action = 'start' - else: - return - status = self.make_api_request(url='subscriptions/{0}?contentType={1}'.format(action, content_type), - append_url=True, get=False) - logging.debug("Set sub status response: {}".format(status)) - try: - logging.debug("Set sub status json: {}".format(status.json())) - except Exception as e: - pass - if 200 <= status.status_code <= 299: - logging.info('Successfully set sub status: {} > {}'.format(content_type, action)) - else: - raise RuntimeError("Unable to set sub status: {}. Error: {}".format(content_type, status.text)) - status.close() - - def interactive(self): - - print('=' * 60) - print('This script will enable or disable Office 365 subscriptions.') - print('=' * 60) - print('Please enter the required data.\n') - if not self.tenant_id: - print(('The Tenant ID is listed under Azure Active Directory | ' - 'Properties and labeled "Directory ID".\nExample: ' - 'cb6997bf-4029-455f-9f7a-e76fee8881da\n')) - self.tenant_id = self.get_info('Enter Tenant ID: ') - if not self.client_key: - print(('\nThe Client Key is available after app registration and labeled "Application ID"' - 'App Registrations | | Application ID' - '\nExample: ' - '553dd2ba-251b-47d5-893d-2f7ab26adf19\n')) - self.client_key = self.get_info('Enter Client Key: ') - if not self.secret_key: - print(('\nThe Secret Key is accessible only one time after the App has been registered:' - '\nExample: ' - 'D8perHbL9gAqx4vx5YbuffCDsvz2Pbdswey72FYRDNk=\n')) - self.secret_key = self.get_info("Enter Secret Key: ") - - c = OrderedDict() - while True: - c['Audit.AzureActiveDirectory'] = 'disabled' - c['Audit.Exchange'] = 'disabled' - c['Audit.General'] = 'disabled' - c['Audit.SharePoint'] = 'disabled' - c['DLP.All'] ='disabled' - - status = self.get_sub_status() - if status != '': - try: - for s in status: - c[s['contentType']] = s['status'] - except (KeyError, TypeError): - print('Error: ', status['error']['message']) - sys.exit(1) - - print('\nEnter 1-5 to enable/disable subscriptions or 0 to exit') - for idx, (c_type, status) in enumerate(c.items(), 1): - print('{}. {}: {}'.format(idx, c_type, status)) - - try: - choice = int(self.get_info('Enter 0-5: ')) - except ValueError: - continue - menu = list(c.items()) - if 1 <= choice <= 5: - self.set_sub_status(menu[choice - 1]) - continue - elif choice == 6: - continue - elif choice == 0: - break - else: - continue - - -if __name__ == "__main__": - try: - subscriber = AuditLogSubscriber() - subscriber.interactive() - except KeyboardInterrupt: - logging.warning("Control-C Pressed, stopping...") - sys.exit() diff --git a/Source/Interfaces/AzureBlobInterface.py b/Source/Interfaces/AzureBlobInterface.py deleted file mode 100644 index 70adc68..0000000 --- a/Source/Interfaces/AzureBlobInterface.py +++ /dev/null @@ -1,91 +0,0 @@ -from . import FileInterface -from azure.storage.blob import BlobServiceClient -import os - - -class AzureBlobInterface(FileInterface.FileInterface): - - interface_name = 'azureBlob' - - def __init__(self, blob_connection_string=None, **kwargs): - """ - Interface to send logs to CSV file(s). Not every audit log has every possible column, so columns in the CSV - file might change over time. Therefore, the CSV file is recreated each time the cache_size is hit to insure - integrity, taking the performance hit. - """ - super().__init__(**kwargs) - self.connection_string = blob_connection_string - self._blob_service = None - self._container_client = None - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - @property - def separate_by_content(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: bool - """ - return self.collector.config['output', 'azureBlob', 'separateByContentType'] - - @property - def separator(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: str - """ - return self.collector.config['output', 'azureBlob', 'separator'] - - @property - def path(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: str - """ - return self.collector.config['output', 'azureBlob', 'tempPath'] - - @property - def cache_size(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: int - """ - return self.collector.config['output', 'azureBlob', 'cacheSize'] - - @property - def blob_service(self): - - if not self._blob_service: - self._blob_service = BlobServiceClient.from_connection_string(conn_str=self.connection_string) - return self._blob_service - - @property - def container_client(self): - - if not self._container_client: - container_name = self.collector.config['output', 'azureBlob', 'containerName'] or 'audit-logs' - if container_name not in [container['name'] for container in self.blob_service.list_containers()]: - self._container_client = self._blob_service.create_container(name=container_name) - else: - self._container_client = self._blob_service.get_container_client(container=container_name) - return self._container_client - - def write_blob(self, blob_name, file_path): - - blob_client = self.container_client.get_blob_client(blob=blob_name) - with open(file_path, 'rb') as ofile: - blob_client.upload_blob(ofile, overwrite=True) - - def exit_callback(self): - - super().exit_callback() - if not self.separate_by_content: - self.write_blob(blob_name=self.collector.config['output', 'azureBlob', 'blobName'] or 'AuditLog', - file_path=self.path) - for content_type in self.paths.keys(): - temp_file_path = self.paths[content_type] - blob_name = os.path.split(self._path_for(content_type=content_type))[-1] - self.write_blob(blob_name=blob_name, file_path=temp_file_path) diff --git a/Source/Interfaces/AzureOMSInterface.py b/Source/Interfaces/AzureOMSInterface.py deleted file mode 100644 index 37290eb..0000000 --- a/Source/Interfaces/AzureOMSInterface.py +++ /dev/null @@ -1,131 +0,0 @@ -from . import _Interface -import requests -import requests.adapters -import hashlib -import hmac -import base64 -import logging -import threading -import collections -import time -import json -import datetime - - -class AzureOMSInterface(_Interface.Interface): - - interface_name = 'azureLogAnalytics' - - def __init__(self, **kwargs): - """ - Interface to send logs to an Azure Log Analytics Workspace. - :param workspace_id: Found under "Agent Configuration" blade (str) - :param shared_key: Found under "Agent Configuration" blade (str) - """ - super().__init__(**kwargs) - self.threads = collections.deque() - self.session = requests.Session() - max_threads = self.collector.config['output', 'azureLogAnalytics', 'maxThreads'] or 50 - adapter = requests.adapters.HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads) - self.session.mount('https://', adapter) - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - def monitor_queue(self): - """ - Overloaded for multithreading. - """ - while 1: - self.threads = [running_thread for running_thread in self.threads if running_thread.is_alive()] - if self.queue and len(self.threads) < (self.collector.config['output', 'azureLogAnalytics', 'maxThreads'] - or 50): - msg, content_type = self.queue.popleft() - if msg == 'stop monitor thread': - [running_thread.join() for running_thread in self.threads] - return - else: - new_thread = threading.Thread(target=self._send_message, - kwargs={"msg": msg, "content_type": content_type}, daemon=True) - new_thread.start() - self.threads.append(new_thread) - - def _send_message(self, msg, content_type, retries=3): - """ - Send a single message to a graylog input; the socket must be closed after each individual message, - otherwise Graylog will interpret it as a single large message. - :param msg: dict - """ - time_generated = msg['CreationTime'] - msg_string = json.dumps(msg) - if not msg_string: - return - while True: - try: - self._post_data(body=msg_string, log_type=content_type.replace('.', ''), time_generated=time_generated) - except Exception as e: - logging.error("Error sending to OMS: {}. Retries left: {}".format(e, retries)) - if retries: - retries -= 1 - time.sleep(10) - continue - else: - self.unsuccessfully_sent += 1 - break - else: - self.successfully_sent += 1 - break - - def _build_signature(self, date, content_length, method, content_type, resource): - """ - Returns authorization header which will be used when sending data into Azure Log Analytics. - """ - - x_headers = 'x-ms-date:' + date - string_to_hash = method + "\n" + str(content_length) + "\n" + content_type + "\n" + x_headers + "\n" + resource - bytes_to_hash = bytes(string_to_hash, 'UTF-8') - decoded_key = base64.b64decode(self.collector.config['output', 'azureLogAnalytics', 'sharedKey']) - encoded_hash = base64.b64encode(hmac.new(decoded_key, bytes_to_hash, digestmod=hashlib.sha256).digest()).decode( - 'utf-8') - authorization = "SharedKey {}:{}".format(self.collector.config['output', 'azureLogAnalytics', 'workspaceId'], - encoded_hash) - return authorization - - def _post_data(self, body, log_type, time_generated): - """ - Sends payload to Azure Log Analytics Workspace. - :param body: payload to send to Azure Log Analytics (json.dumps str) - :param log_type: Azure Log Analytics table name (str) - :param time_generated: date time of the original audit log msg (ISO 8601 str) - """ - method = 'POST' - content_type = 'application/json' - resource = '/api/logs' - rfc1123date = datetime.datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S GMT') - content_length = len(body) - signature = self._build_signature(rfc1123date, content_length, method, content_type, resource) - - uri = 'https://' + self.collector.config['output', 'azureLogAnalytics', 'workspaceId'] + \ - '.ods.opinsights.azure.com' + resource + '?api-version=2016-04-01' - - headers = { - 'content-type': content_type, - 'Authorization': signature, - 'Log-Type': log_type, - 'x-ms-date': rfc1123date, - 'time-generated-field': time_generated - } - response = self.session.post(uri, data=body, headers=headers) - status_code = response.status_code - try: - json_output = response.json() - except: - json_output = '' - - response.close() - if 200 <= status_code <= 299: - logging.debug('Accepted payload:' + body) - else: - raise RuntimeError("Unable to send to OMS with {}: {} ".format(status_code, json_output)) \ No newline at end of file diff --git a/Source/Interfaces/AzureTableInterface.py b/Source/Interfaces/AzureTableInterface.py deleted file mode 100644 index 0837357..0000000 --- a/Source/Interfaces/AzureTableInterface.py +++ /dev/null @@ -1,94 +0,0 @@ -import collections -import logging -import threading -from . import _Interface -import azure.data.tables - -azure.data.tables._error.DecodeError - - -class AzureTableInterface(_Interface.Interface): - - interface_name = 'azureTable' - - def __init__(self, table_connection_string=None, **kwargs): - """ - Interface to send logs to CSV file(s). Not every audit log has every possible column, so columns in the CSV - file might change over time. Therefore, the CSV file is recreated each time the cache_size is hit to insure - integrity, taking the performance hit. - """ - super().__init__(**kwargs) - self.connection_string = table_connection_string - self._table_service = None - self._table_client = None - self._threads = collections.deque() - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - @property - def table_service(self): - - if not self._table_service: - if not self.connection_string: - raise RuntimeError("Azure table output needs a connection string. Use --table-string to pass one.") - self._table_service = ( - azure.data.tables.TableServiceClient.from_connection_string(conn_str=self.connection_string)) - return self._table_service - - @property - def table_client(self): - - if not self._table_client: - self._table_client = self.table_service.create_table_if_not_exists( - table_name=self.collector.config['output', 'azureTable', 'tableName'] or 'AuditLogs') - return self._table_client - - @staticmethod - def _validate_fields(msg): - - for k, v in msg.copy().items(): - if (isinstance(v, int) and v > 2147483647) or isinstance(v, list) or isinstance(v, dict): - msg[k] = str(v) - return msg - - def monitor_queue(self): - """ - Overloaded for multithreading. - """ - while 1: - self._threads = [running_thread for running_thread in self._threads if running_thread.is_alive()] - if self.queue and len(self._threads) < (self.collector.config['output', 'azureTable', 'maxThreads'] or 10): - msg, content_type = self.queue.popleft() - if msg == 'stop monitor thread': - [running_thread.join() for running_thread in self._threads] - return - else: - new_thread = threading.Thread(target=self._send_message, - kwargs={"msg": msg, "content_type": content_type}, daemon=True) - new_thread.start() - self._threads.append(new_thread) - - def _send_message(self, msg, content_type, **kwargs): - try: - msg = self._validate_fields(msg=msg) - entity = { - 'PartitionKey': content_type, - 'RowKey': msg['Id'], - } - entity.update(msg) - self.table_client.create_entity(entity) - except azure.data.tables._error.ResourceExistsError: - self.successfully_sent += 1 - return - except Exception as e: - self.unsuccessfully_sent += 1 - logging.error("Error sending log to Azure Table. Log: {}. Error: {}.".format(msg, e)) - else: - self.successfully_sent += 1 - - def exit_callback(self): - - return [thread.join() for thread in self._threads] diff --git a/Source/Interfaces/FileInterface.py b/Source/Interfaces/FileInterface.py deleted file mode 100644 index b41a94d..0000000 --- a/Source/Interfaces/FileInterface.py +++ /dev/null @@ -1,109 +0,0 @@ -import logging -import os -from . import _Interface -import collections -import pandas - - -class FileInterface(_Interface.Interface): - - interface_name = 'file' - - def __init__(self, **kwargs): - """ - Interface to send logs to CSV file(s). Not every audit log has every possible column, so columns in the CSV - file might change over time. Therefore, the CSV file is recreated each time the cache_size is hit to insure - integrity, taking the performance hit. - """ - super().__init__(**kwargs) - self.paths = {} - self.results_cache = collections.defaultdict(collections.deque) - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - @property - def total_cache_length(self): - - return sum([len(self.results_cache[k]) for k in self.results_cache.keys()]) - - @property - def separate_by_content(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: bool - """ - return self.collector.config['output', 'file', 'separateByContentType'] - - @property - def separator(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: str - """ - return self.collector.config['output', 'file', 'separator'] - - @property - def path(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: str - """ - return self.collector.config['output', 'file', 'path'] - - @property - def cache_size(self): - """ - Needed because AzureBlobInterface inherits from this interface and has its own config. - :return: int - """ - return self.collector.config['output', 'file', 'cacheSize'] - - def _path_for(self, content_type): - - if content_type not in self.paths: - if not self.separate_by_content: - self.paths[content_type] = self.path or 'output.csv' - else: - path, file_name = os.path.split(self.path or 'output.csv') - file_name = file_name.strip('.csv') - file_name = "{}_{}.csv".format(file_name, content_type.replace('.', '')) - self.paths[content_type] = os.path.join(path, file_name) - return self.paths[content_type] - - def _send_message(self, msg, content_type, **kwargs): - - self.results_cache[content_type].append(msg) - if self.total_cache_length >= (self.cache_size or 500000): - self._process_caches() - - def exit_callback(self): - - self._process_caches() - - def _process_caches(self): - - for content_type in self.results_cache.keys(): - self._process_cache(content_type=content_type) - - def _process_cache(self, content_type): - - amount = len(self.results_cache[content_type]) - try: - df = pandas.DataFrame(self.results_cache[content_type]) - self.results_cache[content_type].clear() - if os.path.exists(self._path_for(content_type=content_type)): - existing_df = pandas.read_csv( - self._path_for(content_type=content_type), - sep=self.separator or ';') - df = pandas.concat([existing_df, df]) - logging.info("Writing {} logs of type {} to {}".format(amount, content_type, self._path_for(content_type))) - df.to_csv(self._path_for(content_type=content_type), index=False, sep=self.separator or ';', mode='w', - header=not os.path.exists(self.paths[content_type])) - except Exception as e: - self.unsuccessfully_sent += amount - raise e - else: - self.successfully_sent += amount diff --git a/Source/Interfaces/FluentdInterface.py b/Source/Interfaces/FluentdInterface.py deleted file mode 100644 index f82f5cb..0000000 --- a/Source/Interfaces/FluentdInterface.py +++ /dev/null @@ -1,50 +0,0 @@ -from . import _Interface -import logging - - -class FluentdInterface(_Interface.Interface): - - interface_name = 'fluentd' - - def __init__(self, **kwargs): - - super().__init__(**kwargs) - self._logger = None - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - @property - def address(self): - - return self.collector.config['output', self.interface_name, 'address'] - - @property - def port(self): - - return self.collector.config['output', self.interface_name, 'port'] - - @property - def tenant_name(self): - - return self.collector.config['output', self.interface_name, 'tenantName'] - - @property - def logger(self): - - if not self._logger: - from fluent import sender - self._logger = sender.FluentSender('o365', host=self.address, port=int(self.port)) - return self._logger - - def _send_message(self, msg, content_type, **kwargs): - - try: - msg['tenant'] = self.tenant_name - self.logger.emit(content_type, msg) - self.successfully_sent += 1 - except Exception as e: - logging.error("Error outputting to Fluentd: {}".format(e)) - self.unsuccessfully_sent += 1 diff --git a/Source/Interfaces/GraylogInterface.py b/Source/Interfaces/GraylogInterface.py deleted file mode 100644 index 8afe73c..0000000 --- a/Source/Interfaces/GraylogInterface.py +++ /dev/null @@ -1,71 +0,0 @@ -from . import _Interface -import datetime -import logging -import socket -import json -import time - - -class GraylogInterface(_Interface.Interface): - - interface_name = 'graylog' - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - def _send_message(self, msg, retries=3, **kwargs): - """ - Send a single message to a graylog input; the socket must be closed after each individual message, - otherwise Graylog will interpret it as a single large message. - :param msg: dict - """ - msg = self._add_timestamp_field(msg) - msg_string = json.dumps(msg) - if not msg_string: - return - while True: - try: - sock = self._connect_to_graylog_input() - except OSError as e: # For issue: OSError: [Errno 99] Cannot assign requested address #6 - if retries: - logging.error("Error connecting to graylog: {}. Retrying {} more times".format(e, retries)) - retries -= 1 - time.sleep(30) - else: - logging.error("Error connecting to graylog: {}. Giving up for this message: {}".format( - e, msg_string)) - self.unsuccessfully_sent += 1 - return - else: - break - try: - sock.sendall(msg_string.encode()) - except Exception as e: - self.unsuccessfully_sent += 1 - logging.error("Error sending message to graylog: {}.".format(e)) - sock.close() - self.successfully_sent += 1 - - @staticmethod - def _add_timestamp_field(msg): - """ - Microsoft uses the CreationTime field for the datetime of the creation of the log. Graylog uses Timestamp for - this by default. Add a Timestamp log to a msg with a Graylog compatible datetime for convencience. - :param msg: JSON dict - :return: JSON dict - """ - creation_time = datetime.datetime.strptime(msg['CreationTime'], "%Y-%m-%dT%H:%M:%S") - timestamp = creation_time.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] - msg['timestamp'] = timestamp - return msg - - def _connect_to_graylog_input(self): - """ - Return a socket connected to the Graylog input. - """ - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.connect((self.collector.config['output', self.interface_name, 'address'], - int(self.collector.config['output', self.interface_name, 'port']))) - return s diff --git a/Source/Interfaces/PRTGInterface.py b/Source/Interfaces/PRTGInterface.py deleted file mode 100644 index cd704df..0000000 --- a/Source/Interfaces/PRTGInterface.py +++ /dev/null @@ -1,54 +0,0 @@ -from . import _Interface -import collections - - -class PRTGInterface(_Interface.Interface): - - interface_name = 'prtg' - - def __init__(self, **kwargs): - """ - Interface to send logs to an Azure Log Analytics Workspace. - """ - super().__init__(**kwargs) - self.results = collections.defaultdict(collections.deque) - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - def _send_message(self, msg, content_type, **kwargs): - - for channel in self.collector.config['output', self.interface_name, 'channels']: - if content_type not in channel['filters']: - continue - self._filter_result(msg=msg, content_type=content_type, channel=channel) - - def _filter_result(self, msg, content_type, channel): - - for filter_key, filter_value in channel['filters'][content_type].items(): - if filter_key not in msg or filter_value.lower() != msg[filter_key].lower(): - return - self.results[channel['name']].append(msg) - - def output(self): - from prtg.sensor.result import CustomSensorResult - try: - csr = CustomSensorResult() - for channel in self.collector.config['output', self.interface_name, 'channels']: - if channel['name'] not in self.results: - self.results[channel['name']] = collections.deque() - for channel_name, messages in self.results.items(): - csr.add_channel( - name=channel_name, value=len(messages), unit='Count') - print(csr.json_result) - except Exception as e: - csr = CustomSensorResult(text="Python Script execution error") - csr.error = "Python Script execution error: %s" % str(e) - print(csr.json_result) - - def exit_callback(self): - - super().exit_callback() - self.output() diff --git a/Source/Interfaces/SqlInterface.py b/Source/Interfaces/SqlInterface.py deleted file mode 100644 index 9ae7705..0000000 --- a/Source/Interfaces/SqlInterface.py +++ /dev/null @@ -1,229 +0,0 @@ -from . import _Interface -from sqlalchemy import create_engine, inspect -import pyodbc -import time -import urllib -import logging -import threading -import collections -import pandas - - -class SqlInterface(_Interface.Interface): - - interface_name = 'sql' - - def __init__(self, sql_connection_string, **kwargs): - """ - Interface to send logs to an SQL database. Caches logs in memory until the cache size is hit, then writes them - to database. When the cache is too small too many SQL writes are made taking ages to complete. Too - large and the collector will eat too much memory. - """ - super().__init__(**kwargs) - self._connection_string = sql_connection_string - self.results_cache = collections.defaultdict(collections.deque) - self._existing_columns = {} - self._threads = collections.deque() - self._engine = None - - @property - def enabled(self): - - return self.collector.config['output', self.interface_name, 'enabled'] - - @property - def engine(self): - """ - DB Engine for use in main thread. A separate one is creation for each sub thread. - :return: sqlalchemy.Engine - """ - if not self._engine: - self._engine = create_engine(self.connection_string) - return self._engine - - @staticmethod - def _table_name_for(content_type): - """ - Create a table name for a content type (remove periods). - :param content_type: str - :return: str - """ - return content_type.replace('.', '') - - def _existing_columns_for(self, content_type, engine): - """ - Cache columns currently existing for a table. Used to check if new incoming logs have columns that currently - don't exist in the database. - :param content_type: str - :return: list of str - """ - if content_type not in self._existing_columns.keys(): - self._existing_columns[content_type] = \ - pandas.read_sql_query(f"SELECT TOP (1) * FROM {self._table_name_for(content_type)};", - con=engine).columns.tolist() - return self._existing_columns[content_type] - - @property - def total_cache_length(self): - - return sum([len(self.results_cache[k]) for k in self.results_cache.keys()]) - - @property - def connection_string(self): - - params = urllib.parse.quote_plus(self._connection_string) - return 'mssql+pyodbc:///?autocommit=true&odbc_connect={}'.format(params) - - @staticmethod - def _validate_column_names(df): - """ - Audit logs tend to have periods (.) in their column names. Take those out. If a log column has the same name - as an existing column in the database, but the capitalization doesn't match, rename the column to the existing - one. Otherwise SQL will throw an error for duplicate column names. - :param df: pandas.DataFrame. - :return: pandas.DataFrame. - """ - to_rename = {} - for column in df: - if '.' in column: - to_rename[column] = column.replace('.', '') - return df.rename(columns=to_rename) - - @staticmethod - def _validate_column_value(df): - """ - Flatten columns that a list as value. E.g. column "ColumnA: [1,2,3]" becomes: - "ColumnA_0: 1, ColumnA_1: 2, ColumnA_2: 3". - :param df: pandas.DataFrame. - :return: pandas.DataFrame - """ - for column in df.columns.tolist(): - for i, value in enumerate(df[column]): - if type(df[column][i]) in [list, dict]: - df[column][i] = str(df[column][i]) - return df - - def _validate_existing_columns(self, df, content_type, engine): - """ - Not all audit logs have all available columns. There columns in the database might change as logs come in. - Check whether all columns in a log already exist in the current table. - :return: Bool - """ - if inspect(engine).has_table(self._table_name_for(content_type=content_type)): - new_cols = df.columns.tolist() - missing_cols = set(new_cols) - set(self._existing_columns_for(content_type, engine=engine)) - return not missing_cols - return True - - @staticmethod - def _deduplicate_columns(df): - """ - Different logs sometimes have identical columns names but with different capitalization (for some reason); - merge these columns. - :param df: - :return: - """ - to_check = df.columns.tolist() - leading_columns = [] - to_merge = collections.defaultdict(collections.deque) - for column in to_check: - for leading_column in leading_columns: - if column.lower() == leading_column.lower() and column != leading_column: - to_merge[leading_column].append(column) - break - else: - leading_columns.append(column) - for leading_column, columns_to_merge in to_merge.items(): - new_column = df[leading_column] - for column_to_merge in columns_to_merge: - new_column = new_column.combine_first(df[column_to_merge]) - del df[column_to_merge] - del df[leading_column] - df[leading_column] = new_column - return df - - def _remake_table(self, new_data, content_type, engine): - """ - If a new log is coming in that has columns that don't exist in the current table, replace it instead of - appending. - :param new_data: pandas.DataFrame - :param content_type: str - """ - table_name = self._table_name_for(content_type=content_type) - existing_table = pandas.read_sql_table(con=engine, table_name=table_name) - df = pandas.concat([new_data, existing_table]) - self._existing_columns[content_type] = df.columns.tolist() - logging.info("Recommitting {} records of type {} to table {}".format( - len(df), content_type, table_name)) - df = df.loc[:, ~df.columns.duplicated()] # Remove any duplicate columns - df = self._deduplicate_columns(df=df) - df.to_sql(name=table_name, con=engine, index=False, if_exists='replace', - chunksize=int((self.collector.config['output', self.interface_name, 'chunkSize'] or 2000) / len(df.columns)), - method='multi') - - def _send_message(self, msg, content_type, **kwargs): - """ - Write logs to cache. Process cache if cache size is hit. - :param msg: JSON - :param content_type: str - """ - self.results_cache[content_type].append(msg) - if self.total_cache_length >= (self.collector.config['output', self.interface_name, 'cacheSize'] or 500000): - self._wait_threads() - self._threads.clear() - self._process_caches() - - def _process_caches(self): - """ - Write all cached logs to database. - """ - for content_type in self.results_cache.copy().keys(): - if not self.results_cache[content_type]: - continue - thread = threading.Thread(target=self._process_cache, kwargs={'content_type': content_type}, daemon=True) - thread.start() - self._threads.append(thread) - - def _wait_threads(self, timeout=600): - - while True in [thread.is_alive() for thread in self._threads]: - if not timeout: - raise RuntimeError("Timeout while committing to database") - timeout -= 1 - time.sleep(1) - - def _process_cache(self, content_type): - """ - Write cached logs to database for a content type. - :param content_type: str - """ - df = pandas.DataFrame(self.results_cache[content_type]) - df = self._validate_column_names(df=df) - df = self._validate_column_value(df=df) - - table_name = self._table_name_for(content_type=content_type) - engine = create_engine(self.connection_string) - with engine.connect(): - try: - if not self._validate_existing_columns(df=df, content_type=content_type, engine=engine): - self._remake_table(new_data=df, content_type=content_type, engine=engine) - else: - logging.info("Committing {} records of type {} to table {}".format( - len(df), content_type, table_name)) - df = df.loc[:, ~df.columns.duplicated()] # Remove any duplicate columns - df = self._deduplicate_columns(df=df) - df.to_sql( - name=table_name, con=engine, index=False, if_exists='append', - chunksize=int((self.collector.config['output', self.interface_name, 'chunkSize'] or 2000) / len(df.columns)), - method='multi') - except Exception as e: - self.unsuccessfully_sent += len(df) - raise e - else: - self.successfully_sent += len(df) - - def exit_callback(self): - - super().exit_callback() - self._process_caches() - self._wait_threads() diff --git a/Source/Interfaces/_Interface.py b/Source/Interfaces/_Interface.py deleted file mode 100644 index bb76a99..0000000 --- a/Source/Interfaces/_Interface.py +++ /dev/null @@ -1,83 +0,0 @@ -from collections import deque -import threading - - -class Interface(object): - - interface_name = '_interface' # Set in subclasses to indicate name used in config - - def __init__(self, collector, **kwargs): - - self.collector = collector - self.monitor_thread = None - self.queue = deque() - self.successfully_sent = 0 - self.unsuccessfully_sent = 0 - - @property - def enabled(self): - """ - Overload for each interface to point to the right setting in the config file. - :return: Bool - """ - return self.collector.config['output', self.interface_name, 'enabled'] - - def reset(self): - - self.successfully_sent = 0 - self.unsuccessfully_sent = 0 - self.queue.clear() - - def start(self): - """ - Start monitoring for messages to dispatch. - """ - self.monitor_thread = threading.Thread(target=self.monitor_queue, daemon=True) - self.monitor_thread.start() - - def stop(self, gracefully=True): - """ - Stop the interface gracefully or forcefully. - :param gracefully: wait for all messages to be dispatched (Bool) - """ - if gracefully: - self.queue.append(('stop monitor thread', '')) - else: - self.queue.appendleft(('stop monitor thread', '')) - if self.monitor_thread.is_alive(): - self.monitor_thread.join() - - def monitor_queue(self): - """ - Check the message queue and dispatch them when found. - """ - while 1: - if self.queue: - msg, content_type = self.queue.popleft() - if msg == 'stop monitor thread': - return self.exit_callback() - else: - self._send_message(msg=msg, content_type=content_type) - - def exit_callback(self): - """ - Called right before the interface is stopped. - """ - pass - - def send_messages(self, *messages, content_type): - """ - Send message(s) to this interface. They will be handled asynchronously. - :param messages: list of dict - :param content_type: str - """ - for message in messages: - self.queue.append((message, content_type)) - - def _send_message(self, msg, content_type, **kwargs): - """ - Overload and implement actual sending of the message to the interface. - :param msg: dict - :param content_type: str - """ - pass diff --git a/Source/RustEngine/Cargo.lock b/Source/RustEngine/Cargo.lock deleted file mode 100644 index 405ee30..0000000 --- a/Source/RustEngine/Cargo.lock +++ /dev/null @@ -1,1329 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "alc" -version = "0.1.0" -dependencies = [ - "chrono", - "futures", - "log", - "pyo3", - "reqwest", - "serde", - "serde_derive", - "serde_json", - "serde_yaml", - "simple_logger", - "tokio", - "tokio-stream", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "base64" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "bumpalo" -version = "3.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a45a46ab1f2412e53d3a0ade76ffad2025804294569aae387231a0cd6e0899" - -[[package]] -name = "bytes" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" - -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "time 0.1.44", - "winapi", -] - -[[package]] -name = "colored" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3616f750b84d8f0de8a58bda93e08e2a81ad3f523089b05f1dffecab48c6cbd" -dependencies = [ - "atty", - "lazy_static", - "winapi", -] - -[[package]] -name = "core-foundation" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" - -[[package]] -name = "encoding_rs" -version = "0.8.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "fastrand" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" -dependencies = [ - "instant", -] - -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - -[[package]] -name = "form_urlencoded" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" -dependencies = [ - "matches", - "percent-encoding", -] - -[[package]] -name = "futures" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" - -[[package]] -name = "futures-executor" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" - -[[package]] -name = "futures-macro" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" - -[[package]] -name = "futures-task" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" - -[[package]] -name = "futures-util" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "h2" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37a82c6d637fc9515a4694bbf1cb2457b79d81ce52b3108bdeea58b07dd34a57" -dependencies = [ - "bytes", - "fnv", - "futures-core", - "futures-sink", - "futures-util", - "http", - "indexmap", - "slab", - "tokio", - "tokio-util", - "tracing", -] - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "http" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f4c6746584866f0feabcc69893c5b51beef3831656a968ed7ae254cdc4fd03" -dependencies = [ - "bytes", - "fnv", - "itoa", -] - -[[package]] -name = "http-body" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ff4f84919677303da5f147645dbea6b1881f368d03ac84e1dc09031ebd7b2c6" -dependencies = [ - "bytes", - "http", - "pin-project-lite", -] - -[[package]] -name = "httparse" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6330e8a36bd8c859f3fa6d9382911fbb7147ec39807f63b923933a247240b9ba" - -[[package]] -name = "httpdate" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" - -[[package]] -name = "hyper" -version = "0.14.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b26ae0a80afebe130861d90abf98e3814a4f28a4c6ffeb5ab8ebb2be311e0ef2" -dependencies = [ - "bytes", - "futures-channel", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "httparse", - "httpdate", - "itoa", - "pin-project-lite", - "socket2", - "tokio", - "tower-service", - "tracing", - "want", -] - -[[package]] -name = "hyper-tls" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" -dependencies = [ - "bytes", - "hyper", - "native-tls", - "tokio", - "tokio-native-tls", -] - -[[package]] -name = "idna" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" -dependencies = [ - "matches", - "unicode-bidi", - "unicode-normalization", -] - -[[package]] -name = "indexmap" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" -dependencies = [ - "autocfg", - "hashbrown", -] - -[[package]] -name = "indoc" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7906a9fababaeacb774f72410e497a1d18de916322e33797bb2cd29baa23c9e" -dependencies = [ - "unindent", -] - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "ipnet" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b" - -[[package]] -name = "itoa" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" - -[[package]] -name = "js-sys" -version = "0.3.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671a26f820db17c2a2750743f1dd03bafd15b98c9f30c7c2628c024c05d73397" -dependencies = [ - "wasm-bindgen", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "libc" -version = "0.2.124" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50" - -[[package]] -name = "linked-hash-map" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fb9b38af92608140b86b693604b9ffcc5824240a484d1ecd4795bacb2fe88f3" - -[[package]] -name = "lock_api" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6389c490849ff5bc16be905ae24bc913a9c8892e19b2341dbc175e14c341c2b8" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "matches" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" - -[[package]] -name = "memchr" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" - -[[package]] -name = "mime" -version = "0.3.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" - -[[package]] -name = "mio" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52da4364ffb0e4fe33a9841a98a3f3014fb964045ce4f7a45a398243c8d6b0c9" -dependencies = [ - "libc", - "log", - "miow", - "ntapi", - "wasi 0.11.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "miow" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9f1c5b025cda876f66ef43a113f91ebc9f4ccef34843000e0adf6ebbab84e21" -dependencies = [ - "winapi", -] - -[[package]] -name = "native-tls" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd7e2f3618557f980e0b17e8856252eee3c97fa12c54dff0ca290fb6266ca4a9" -dependencies = [ - "lazy_static", - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - -[[package]] -name = "ntapi" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28774a7fd2fbb4f0babd8237ce554b73af68021b5f695a3cebd6c59bac0980f" -dependencies = [ - "winapi", -] - -[[package]] -name = "num-integer" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "num_threads" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aba1801fb138d8e85e11d0fc70baf4fe1cdfffda7c6cd34a854905df588e5ed0" -dependencies = [ - "libc", -] - -[[package]] -name = "once_cell" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" - -[[package]] -name = "openssl" -version = "0.10.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7ae222234c30df141154f159066c5093ff73b63204dcda7121eb082fc56a95" -dependencies = [ - "bitflags", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-sys", -] - -[[package]] -name = "openssl-probe" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" - -[[package]] -name = "openssl-sys" -version = "0.9.72" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e46109c383602735fa0a2e48dd2b7c892b048e1bf69e5c3b1d804b7d9c203cb" -dependencies = [ - "autocfg", - "cc", - "libc", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "parking_lot" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "995f667a6c822200b0433ac218e05582f0e2efa1b922a3fd2fbaadc5f87bab37" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-sys", -] - -[[package]] -name = "percent-encoding" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" - -[[package]] -name = "pin-project-lite" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "pkg-config" -version = "0.3.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" - -[[package]] -name = "proc-macro2" -version = "1.0.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec757218438d5fda206afc041538b2f6d889286160d649a86a24d37e1235afd1" -dependencies = [ - "unicode-xid", -] - -[[package]] -name = "pyo3" -version = "0.16.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd86513975ed69bf3fb5d4a286cdcda66dbc56f84bdf4832b6c82b459f4417b2" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "parking_lot", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.16.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "450e2e56cbfa67bbe224cef93312b7a76d81c471d4e0c459d24d4bfaf3d75b53" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.16.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36e653782972eba2fe86e8319ade54b97822c65fb1ccc1e116368372faa6ebc9" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.16.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "317ce641f29f4e10e75765630bf4d28b2008612226fcc80b27f334fee8184d0f" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.16.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59342fce58a05983688e8d81209d06f67f0fcb1597253ef63b390b2da2417522" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "redox_syscall" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" -dependencies = [ - "bitflags", -] - -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - -[[package]] -name = "reqwest" -version = "0.11.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46a1f7aa4f35e5e8b4160449f51afc758f0ce6454315a9fa7d0d113e958c41eb" -dependencies = [ - "base64", - "bytes", - "encoding_rs", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-tls", - "ipnet", - "js-sys", - "lazy_static", - "log", - "mime", - "native-tls", - "percent-encoding", - "pin-project-lite", - "serde", - "serde_json", - "serde_urlencoded", - "tokio", - "tokio-native-tls", - "url", - "wasm-bindgen", - "wasm-bindgen-futures", - "web-sys", - "winreg", -] - -[[package]] -name = "ryu" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" - -[[package]] -name = "schannel" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75" -dependencies = [ - "lazy_static", - "winapi", -] - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "security-framework" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dc14f172faf8a0194a3aded622712b0de276821addc574fa54fc0a1167e10dc" -dependencies = [ - "bitflags", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" -dependencies = [ - "core-foundation-sys", - "libc", -] - -[[package]] -name = "serde" -version = "1.0.136" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" - -[[package]] -name = "serde_derive" -version = "1.0.136" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.79" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" -dependencies = [ - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_urlencoded" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" -dependencies = [ - "form_urlencoded", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "serde_yaml" -version = "0.8.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4a521f2940385c165a24ee286aa8599633d162077a54bdcae2a6fd5a7bfa7a0" -dependencies = [ - "indexmap", - "ryu", - "serde", - "yaml-rust", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" -dependencies = [ - "libc", -] - -[[package]] -name = "simple_logger" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c75a9723083573ace81ad0cdfc50b858aa3c366c48636edb4109d73122a0c0ea" -dependencies = [ - "atty", - "colored", - "log", - "time 0.3.9", - "winapi", -] - -[[package]] -name = "slab" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" - -[[package]] -name = "smallvec" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" - -[[package]] -name = "socket2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "syn" -version = "1.0.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b683b2b825c8eef438b77c36a06dc262294da3d5a5813fac20da149241dcd44d" -dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", -] - -[[package]] -name = "target-lexicon" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fa7e55043acb85fca6b3c01485a2eeb6b69c5d21002e273c79e465f43b7ac1" - -[[package]] -name = "tempfile" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" -dependencies = [ - "cfg-if", - "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "time" -version = "0.1.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - -[[package]] -name = "time" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd" -dependencies = [ - "itoa", - "libc", - "num_threads", - "time-macros", -] - -[[package]] -name = "time-macros" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" - -[[package]] -name = "tinyvec" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c1c1d5a42b6245520c249549ec267180beaffcc0615401ac8e31853d4b6d8d2" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" - -[[package]] -name = "tokio" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af73ac49756f3f7c01172e34a23e5d0216f6c32333757c2c61feb2bbff5a5ee" -dependencies = [ - "bytes", - "libc", - "memchr", - "mio", - "num_cpus", - "once_cell", - "parking_lot", - "pin-project-lite", - "signal-hook-registry", - "socket2", - "tokio-macros", - "winapi", -] - -[[package]] -name = "tokio-macros" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-native-tls" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b" -dependencies = [ - "native-tls", - "tokio", -] - -[[package]] -name = "tokio-stream" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "tokio-util" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0edfdeb067411dba2044da6d1cb2df793dd35add7888d73c16e3381ded401764" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", - "tracing", -] - -[[package]] -name = "tower-service" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6" - -[[package]] -name = "tracing" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d0ecdcb44a79f0fe9844f0c4f33a342cbcbb5117de8001e6ba0dc2351327d09" -dependencies = [ - "cfg-if", - "pin-project-lite", - "tracing-attributes", - "tracing-core", -] - -[[package]] -name = "tracing-attributes" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e65ce065b4b5c53e73bb28912318cb8c9e9ad3921f1d669eb0e68b4c8143a2b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tracing-core" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f54c8ca710e81886d498c2fd3331b56c93aa248d49de2222ad2742247c60072f" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "try-lock" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" - -[[package]] -name = "unicode-bidi" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a01404663e3db436ed2746d9fefef640d868edae3cceb81c3b8d5732fda678f" - -[[package]] -name = "unicode-normalization" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - -[[package]] -name = "unindent" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514672a55d7380da379785a4d70ca8386c8883ff7eaae877be4d2081cebe73d8" - -[[package]] -name = "url" -version = "2.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" -dependencies = [ - "form_urlencoded", - "idna", - "matches", - "percent-encoding", -] - -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - -[[package]] -name = "want" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" -dependencies = [ - "log", - "try-lock", -] - -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "wasm-bindgen" -version = "0.2.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27370197c907c55e3f1a9fbe26f44e937fe6451368324e009cba39e139dc08ad" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53e04185bfa3a779273da532f5025e33398409573f348985af9a1cbf3774d3f4" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-futures" -version = "0.4.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f741de44b75e14c35df886aff5f1eb73aa114fa5d4d00dcd37b5e01259bf3b2" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17cae7ff784d7e83a2fe7611cfe766ecf034111b49deb850a3dc7699c08251f5" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d554b7f530dee5964d9a9468d95c1f8b8acae4f282807e7d27d4b03099a46744" - -[[package]] -name = "web-sys" -version = "0.3.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5acdd78cb4ba54c0045ac14f62d8f94a03d10047904ae2a40afa1e99d8f70825" -dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_msvc" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17cffbe740121affb56fad0fc0e421804adf0ae00891205213b5cecd30db881d" - -[[package]] -name = "windows_i686_gnu" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2564fde759adb79129d9b4f54be42b32c89970c18ebf93124ca8870a498688ed" - -[[package]] -name = "windows_i686_msvc" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cd9d32ba70453522332c14d38814bceeb747d80b3958676007acadd7e166956" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfce6deae227ee8d356d19effc141a509cc503dfd1f850622ec4b0f84428e1f4" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d19538ccc21819d01deaf88d6a17eae6596a12e9aafdbb97916fb49896d89de9" - -[[package]] -name = "winreg" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" -dependencies = [ - "winapi", -] - -[[package]] -name = "yaml-rust" -version = "0.4.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" -dependencies = [ - "linked-hash-map", -] diff --git a/Source/RustEngine/Cargo.toml b/Source/RustEngine/Cargo.toml deleted file mode 100644 index 7296b12..0000000 --- a/Source/RustEngine/Cargo.toml +++ /dev/null @@ -1,30 +0,0 @@ -[package] -name = "alc" -version = "0.1.0" -edition = "2021" - -[lib] -# The name of the native library. This is the name which will be used in Python to import the -# library (i.e. `import string_sum`). If you change this, you must also change the name of the -# `#[pymodule]` in `src/lib.rs`. -name = "alc" -# "cdylib" is necessary to produce a shared library for Python to import from. -# -# Downstream Rust code (including code in `bin/`, `examples/`, and `tests/`) will not be able -# to `use string_sum;` unless the "rlib" or "lib" crate type is also included, e.g.: -# crate-type = ["cdylib", "rlib"] -crate-type = ["cdylib"] - -[dependencies] -pyo3 = { version = "0.16.4", features = ["extension-module"] } -log = "0.4.16" -simple_logger = "2.1.0" -chrono = "0.4.19" -futures = "0.3.21" -reqwest = {version = "0.11.10", features = ["blocking", "json"]} -tokio = {version="1.17.0", features=["full"]} -tokio-stream = "0.1.8" -serde="1.0.136" -serde_yaml = "0.8.23" -serde_json="1.0.79" -serde_derive = "1.0.136" \ No newline at end of file diff --git a/Source/RustEngine/src/api_connection.rs b/Source/RustEngine/src/api_connection.rs deleted file mode 100644 index 8f77aa6..0000000 --- a/Source/RustEngine/src/api_connection.rs +++ /dev/null @@ -1,283 +0,0 @@ -use std::collections::HashMap; -use reqwest; -use log::{debug, info, warn, error}; -use reqwest::header::{AUTHORIZATION, CONTENT_TYPE, HeaderMap}; -use tokio; -use serde_json; -use chrono::{DateTime}; -use futures::{SinkExt, StreamExt}; -use futures::channel::mpsc::{Receiver, Sender}; -use crate::data_structures::{JsonList, StatusMessage, GetBlobConfig, GetContentConfig, - AuthResult, ContentToRetrieve}; - - -/// Return a logged in API connection object. Use the Headers value to make API requests. -pub fn get_api_connection(tenant_id: String, client_id: String, secret_key: String, - publisher_id: String) -> ApiConnection { - - let mut api = ApiConnection { - tenant_id, - client_id, - secret_key, - publisher_id, - headers: HeaderMap::new(), - }; - api.login(); - api -} -/// Abstraction of an API connection to Azure Management APIs. Can be used to login to the API -/// which sets the headers. These headers can then be used to make authenticated requests. -pub struct ApiConnection { - pub tenant_id: String, - pub client_id: String, - secret_key: String, - pub publisher_id: String, - pub headers: HeaderMap, -} -impl ApiConnection { - /// Use tenant_id, client_id and secret_key to request a bearer token and store it in - /// our headers. Must be called once before requesting any content. - fn login(&mut self) { - let auth_url = - format!("https://login.microsoftonline.com/{}/oauth2/token", - self.tenant_id.to_string()); - let resource = "https://manage.office.com"; - let params = [("grant_type", "client_credentials"), ("client_id", &self.client_id), - ("client_secret", &self.secret_key), ("resource", &resource)]; - self.headers.insert(CONTENT_TYPE, - "application/x-www-form-urlencoded".parse().unwrap()); - let login_client = reqwest::blocking::Client::new(); - let json: AuthResult = login_client - .post(auth_url) - .headers(self.headers.clone()) - .form(¶ms) - .send() - .unwrap() - .json() - .unwrap(); - let token = format!("bearer {}", json.access_token); - self.headers.insert(AUTHORIZATION, token.parse().unwrap()); - } -} - - -/// Create a URL that can retrieve the first page of content for each passed content type. Each -/// content type can have multiple runs specified. A run consists of a start- and end date to -/// retrieve data for. Max. time span is 24, so if the user wants to retrieve for e.g. 72 hours, -/// we need 3 runs of 24 hours each. The runs object looks like e.g.: -/// Runs{Audit.Exchange: [(start_date, end_date), (start_date, end_date), (start_date, end_date)} -pub fn create_base_urls( - content_types: Vec, tenant_id: String, publisher_id: String, - runs: HashMap>) -> Vec<(String, String)> { - let mut urls_to_get: Vec<(String, String)> = Vec::new(); - for content_type in content_types.iter() { - let content_runs = runs.get(content_type).unwrap(); - for content_run in content_runs.into_iter() { - let (start_time, end_time) = content_run; - urls_to_get.push( - (content_type.to_string(), - format!("https://manage.office.com/api/v1.0/{}/activity/feed/\ - subscriptions/content?contentType={}&startTime={}&endTime={}\ - &PublisherIdentifier={}", - tenant_id, content_type, start_time, end_time, publisher_id) - )); - } - } - urls_to_get -} - - -/// Get available content blobs to retrieve. A base URL receices the initial page of content blobs. -/// The response header could specify 'NextPageUri', which if it exists specifies the URL for the -/// next page of content. This is sent over the blobs_tx channel to retrieve as well. If no -/// additional pages exist, a status message is sent to indicate all content blobs for this -/// content type have been retrieved. -#[tokio::main(flavor="multi_thread", worker_threads=200)] -pub async fn get_content_blobs(config: GetBlobConfig, blobs_rx: Receiver<(String, String)>) { - blobs_rx.for_each_concurrent(config.threads, |(content_type, url)| { - let blobs_tx = config.blobs_tx.clone(); - let blob_error_tx = config.blob_error_tx.clone(); - let status_tx = config.status_tx.clone(); - let content_tx = config.content_tx.clone(); - let client = config.client.clone(); - let headers = config.headers.clone(); - let content_type = content_type.clone(); - let url = url.clone(); - async move { - match client.get(url.clone()).timeout(std::time::Duration::from_secs(5)). - headers(headers.clone()).send().await { - Ok(resp) => { - handle_blob_response(resp, blobs_tx, status_tx, content_tx, blob_error_tx, - content_type, url).await; - }, - Err(e) => { - error!("Err getting blob response {}", e); - handle_blob_response_error(status_tx, blob_error_tx, content_type, url).await; - } - } - } - }).await; - debug!("Exit blob thread"); -} - - -/// Deal with the response of a successful content blob request. Try to decode into JSON to -/// retrieve the content URIs of the content inside the blob. Also check response header for another -/// page of content blobs. -async fn handle_blob_response( - resp: reqwest::Response, blobs_tx: Sender<(String, String)>, - mut status_tx: Sender, content_tx: Sender, - mut blob_error_tx: Sender<(String, String)>, content_type: String, url: String) { - - handle_blob_response_paging(&resp, blobs_tx, status_tx.clone(), - content_type.clone()).await; - match resp.json::>>().await { - Ok(i) => { - handle_blob_response_content_uris(status_tx, content_tx, content_type, i) - .await; - }, - Err(e) => { - warn!("Err getting blob JSON {}", e); - match blob_error_tx.send((content_type, url)).await { - Err(e) => { - error!("Could not resend failed blob, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); - }, - _=> (), - } - } - } -} - - -/// Determine if a content blob response header contains a reference to another page of blobs. -async fn handle_blob_response_paging( - resp: &reqwest::Response, mut blobs_tx: Sender<(String, String)>, - mut status_tx: Sender, content_type: String) { - - let next_or_not = resp.headers().get("NextPageUri"); - match next_or_not { - Some(i) => { - let new_url = i.to_str().unwrap().to_string(); - blobs_tx.send((content_type.clone(), new_url)).await.unwrap(); - }, - None => { - status_tx. - send(StatusMessage::FinishedContentBlobs).await.unwrap(); - } - }; -} - - -/// Deal with successfully received and decoded content blobs. Send the URIs of content to retrieve -/// over the content_tx channel for the content thread to retrieve. -async fn handle_blob_response_content_uris( - mut status_tx: Sender, mut content_tx: Sender, - content_type: String, content_json: JsonList) { - - for json_dict in content_json.into_iter() { - if json_dict.contains_key("contentUri") == false { - warn!("Invalid blob!: {:?}", json_dict); - } else { - let url = json_dict - .get("contentUri").unwrap() - .to_string() - .strip_prefix('"').unwrap().strip_suffix('"').unwrap() - .to_string(); - let expiration = json_dict.get("contentExpiration").unwrap() - .to_string() - .strip_prefix('"').unwrap().strip_suffix('"').unwrap() - .to_string(); - let content_id = json_dict.get("contentId").unwrap() - .to_string() - .strip_prefix('"').unwrap().strip_suffix('"').unwrap() - .to_string(); - let content_to_retrieve = ContentToRetrieve { - expiration, content_type: content_type.clone(), content_id, url}; - - content_tx.send(content_to_retrieve).await.unwrap(); - status_tx.send(StatusMessage::FoundNewContentBlob).await.unwrap(); - } - }; -} - -/// Deal with error while requesting a content blob. -async fn handle_blob_response_error( - mut status_tx: Sender, mut blob_error_tx: Sender<(String, String)>, - content_type: String, url: String) { - - match blob_error_tx.send((content_type, url)).await { - Err(e) => { - error!("Could not resend failed blob, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); - }, - _=> (), - } -} - - -/// Retrieve the actual ContentUris found in the JSON body of content blobs. -#[tokio::main(flavor="multi_thread", worker_threads=200)] -pub async fn get_content(config: GetContentConfig, content_rx: Receiver) { - content_rx.for_each_concurrent(config.threads, |content_to_retrieve| { - let client = config.client.clone(); - let headers = config.headers.clone(); - let result_tx = config.result_tx.clone(); - let status_tx = config.status_tx.clone(); - let content_error_tx = config.content_error_tx.clone(); - async move { - match client.get(content_to_retrieve.url.clone()) - .timeout(std::time::Duration::from_secs(5)).headers(headers).send().await { - Ok(resp) => { - handle_content_response(resp, result_tx, status_tx, content_error_tx, - content_to_retrieve).await; - }, - Err(_) => { - handle_content_response_error(status_tx, content_error_tx, content_to_retrieve) - .await; - } - } - } - }).await; - debug!("Exit content thread"); -} - - -/// Deal with successful content request response. -async fn handle_content_response( - resp: reqwest::Response, result_tx: std::sync::mpsc::Sender<(String, ContentToRetrieve)>, - mut status_tx: Sender, mut content_error_tx: Sender, - content_to_retrieve: ContentToRetrieve) { - - match resp.text().await { - Ok(json) => { - result_tx.send((json, content_to_retrieve)).unwrap(); - status_tx.send(StatusMessage::RetrievedContentBlob).await.unwrap(); - } - Err(e) => { - warn!("Error interpreting JSON: {}", e); - match content_error_tx.send(content_to_retrieve).await { - Err(e) => { - error!("Could not resend failed content, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); - }, - _=> (), - } - } - } -} - - -/// Deal with error response requesting a contentURI. -async fn handle_content_response_error( - mut status_tx: Sender, mut content_error_tx: Sender, - content_to_retrieve: ContentToRetrieve) { - - match content_error_tx.send(content_to_retrieve).await { - Err(e) => { - error!("Could not resend failed content, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); - }, - _=> (), - } -} diff --git a/Source/RustEngine/src/data_structures.rs b/Source/RustEngine/src/data_structures.rs deleted file mode 100644 index 250f1cf..0000000 --- a/Source/RustEngine/src/data_structures.rs +++ /dev/null @@ -1,90 +0,0 @@ -use futures::channel::mpsc::{Sender, Receiver}; -use std::collections::HashMap; -use reqwest::header::HeaderMap; -use serde_derive::{Deserialize}; - -/// List of JSON responses (used to represent content blobs) -pub type JsonList = Vec>; - - -/// Representation of Office API json response after sending an auth request. We need the bearer -/// token. -#[derive(Deserialize, Debug)] -pub struct AuthResult { - pub access_token: String, -} - - -/// Representation of content we need to retrieve. ID, expiration and content type are passed to -/// python along with the retrieved content. ID an expiration are needed for avoiding known logs, -/// content type for categorization in outputs. -pub struct ContentToRetrieve { - pub content_type: String, - pub content_id: String, - pub expiration: String, - pub url: String -} - -/// Messages for status channel between main threads and the blob/content retrieving threads. -/// Mainly used to keep track of which content still needs retrieving and which is finished, which -/// is necessary for knowing when to terminate. -pub enum StatusMessage { - BeingThrottled, - FinishedContentBlobs, // Finished getting all content blobs for e.g. Audit.Exchange - FoundNewContentBlob, // Found a new blob to retrieved - RetrievedContentBlob, // Finished retrieving a new blob - ErrorContentBlob, // Could not retrieve a blob -} - -/// Used by thread getting content blobs -pub struct GetBlobConfig { - pub client: reqwest::Client, - pub headers: HeaderMap, - pub status_tx: Sender, - pub blobs_tx: Sender<(String, String)>, - pub blob_error_tx: Sender<(String, String)>, - pub content_tx: Sender, - pub threads: usize, -} - - -/// Used by thread getting content -pub struct GetContentConfig { - pub client: reqwest::Client, - pub headers: HeaderMap, - pub result_tx: std::sync::mpsc::Sender<(String, ContentToRetrieve)>, - pub content_error_tx: Sender, - pub status_tx: Sender, - pub threads: usize, -} - - -/// Used by message loop keeping track of progress and terminating other threads when they are -/// finished. -pub struct MessageLoopConfig { - pub status_rx: Receiver, - pub stats_tx: std::sync::mpsc::Sender<(usize, usize, usize, usize)>, - pub blobs_tx: Sender<(String, String)>, - pub blob_error_rx: Receiver<(String, String)>, - pub content_tx: Sender, - pub content_error_rx: Receiver, - pub urls: Vec<(String, String)>, - pub content_types: Vec, - pub retries: usize, -} - - -/// These stats are passed back to python after a run has finished to show to end-user. -pub struct RunStatistics { - pub blobs_found: usize, - pub blobs_successful: usize, - pub blobs_error: usize, - pub blobs_retried: usize, -} -impl RunStatistics { - pub fn new() -> RunStatistics { - RunStatistics { - blobs_found: 0, blobs_successful: 0, blobs_error: 0, blobs_retried: 0 - } - } -} diff --git a/Source/RustEngine/src/lib.rs b/Source/RustEngine/src/lib.rs deleted file mode 100644 index 4cb6992..0000000 --- a/Source/RustEngine/src/lib.rs +++ /dev/null @@ -1,312 +0,0 @@ -use std::thread; -use std::collections::HashMap; -use log::{debug, info, warn, error}; -use futures::{SinkExt}; -use futures::channel::mpsc::channel; -use futures::channel::mpsc::{Sender, Receiver}; -use pyo3::prelude::*; -use crate::data_structures::{ContentToRetrieve, RunStatistics}; - - -mod api_connection; -mod data_structures; - - -#[pyclass] -/// # Rust Engine -/// A class instantiated in Python. Python will call the run_once method below, which will start -/// three background threads responsible for retrieving content. Python will then call -/// the get_result method on a loop to drain the results from the results channel until it is -/// disconnected. The three background threads are: -/// - blob_thread: find content blobs and send results to content channel -/// - content_thread: retrieve content blobs from content channel, send results to results channel -/// - message_loop_thread: keep track of progress, terminate after all content is retrieved -pub struct RustEngine { - tenant_id: String, - client_id: String, - secret_key: String, - publisher_id: String, - content_types: Vec, - runs: HashMap>, - result_rx: Option>, - stats_rx: Option>, - threads: usize, - retries: usize, -} - -#[pymethods] -impl RustEngine { - - #[new] - pub fn new(tenant_id: String, client_id: String, secret_key:String, publisher_id: String, - content_types: Vec, runs: HashMap>, - threads: usize, retries: usize) - -> RustEngine { - RustEngine { - result_rx: None, - stats_rx: None, - tenant_id, - client_id, - secret_key, - publisher_id, - content_types, - runs, - threads, - retries, - } - } - - /// Non-blocking. Call once to start retrieving logs, which will arrive in the results_rx - /// receiver. Call get_results iteratively to drain the results channel. - pub fn run_once(&mut self) { - let api = api_connection::get_api_connection( - self.tenant_id.clone(), self.client_id.clone(), - self.secret_key.clone(), self.publisher_id.clone()); - let (result_rx, stats_rx) = get_available_content( - api, self.content_types.clone(), self.runs.clone(), self.threads, - self.retries); - self.result_rx = Some(result_rx); - self.stats_rx = Some(stats_rx); - } - - /// ValueError means nothing in the channel right now, but more will come. EOFError means - /// all results received, no more will come. Message loop closes the results channel when - /// all content has been retrieved. - pub fn get_result(&self) -> PyResult<(String, String, String, String)> { - match self.result_rx.as_ref().unwrap().try_recv() { - Ok((i,j) ) => { - Ok((i, j.content_id, j.expiration, j.content_type)) - }, - Err(std::sync::mpsc::TryRecvError::Empty) => { - Err(pyo3::exceptions::PyValueError::new_err("No logs ready")) - }, - Err(std::sync::mpsc::TryRecvError::Disconnected) => { - Err(pyo3::exceptions::PyEOFError::new_err("Finished run")) - } - } - } - - /// Receive the run results. This can only happen when the message_loop thread had exited its' - /// loop, so if we return the results we know the engine has stopped. - pub fn stop(&self) -> PyResult<(usize, usize, usize, usize)> { - Ok(self.stats_rx.as_ref().unwrap().try_recv().unwrap()) - } -} - - -/// Initialize a config object for each sub thread to run -/// - Blob thread: Collect available content blobs -/// - Content thread: Collect the blobs found by blob thread -/// - Message loop: Communicates with other threads to handle retries and terminate when finished -fn initialize_configs( - api: api_connection::ApiConnection, content_types: Vec, - runs: HashMap>, retries: usize, threads:usize) - -> (data_structures::GetBlobConfig, data_structures::GetContentConfig, - data_structures::MessageLoopConfig, Receiver<(String, String)>, Receiver, - std::sync::mpsc::Receiver<(String, ContentToRetrieve)>, - std::sync::mpsc::Receiver<(usize, usize, usize, usize)>) { - - let urls = api_connection::create_base_urls( - content_types.clone(), api.tenant_id, api.publisher_id, runs); - - // Create channels to communicate with async closures - let (status_tx, status_rx): - (Sender, Receiver) = - channel(100000); - let (blobs_tx, blobs_rx): (Sender<(String, String)>, Receiver<(String, String)>) = - channel(100000); - let (blob_error_tx, blob_error_rx): - (Sender<(String, String)>, Receiver<(String, String)>) = channel(100000); - let (content_tx, content_rx): (Sender, Receiver) = - channel(100000); - let (content_error_tx, content_error_rx): - (Sender, Receiver) = channel(100000000); - let (result_tx, result_rx): - (std::sync::mpsc::Sender<(String, ContentToRetrieve)>, - std::sync::mpsc::Receiver<(String, ContentToRetrieve)>) = - std::sync::mpsc::channel(); - let (stats_tx, stats_rx): - (std::sync::mpsc::Sender<(usize, usize, usize, usize)>, - std::sync::mpsc::Receiver<(usize, usize, usize, usize)>) = std::sync::mpsc::channel(); - - let blob_config = data_structures::GetBlobConfig { client: reqwest::Client::new(), headers: api.headers.clone(), - status_tx: status_tx.clone(), blobs_tx: blobs_tx.clone(), - blob_error_tx: blob_error_tx.clone(), content_tx: content_tx.clone(), threads - }; - - let content_config = data_structures::GetContentConfig { - client: reqwest::Client::new(), headers: api.headers.clone(), result_tx: result_tx.clone(), - content_error_tx: content_error_tx.clone(), status_tx: status_tx.clone(), threads - }; - - let message_loop_config = data_structures::MessageLoopConfig { - content_tx: content_tx.clone(), blobs_tx: blobs_tx.clone(), stats_tx: stats_tx.clone(), - urls, content_error_rx, status_rx, blob_error_rx, content_types, retries}; - return (blob_config, content_config, message_loop_config, blobs_rx, content_rx, result_rx, stats_rx) -} - - -/// Get all the available log content for a list of content types and runs (start- and end times -/// of content to receive). -fn get_available_content(api: api_connection::ApiConnection, content_types: Vec, - runs: HashMap>, threads: usize, - retries: usize) - -> (std::sync::mpsc::Receiver<(String, ContentToRetrieve)>, - std::sync::mpsc::Receiver<(usize, usize, usize, usize)>) { - - let (blob_config, content_config, message_loop_config, - blobs_rx, content_rx, result_rx, stats_rx) - = initialize_configs(api, content_types, runs, retries, threads); - spawn_blob_collector(blob_config, content_config, message_loop_config, blobs_rx, content_rx); - (result_rx, stats_rx) -} - -/// Spawn threads running the actual collectors, and a message loop thread to keep track of -/// progress and terminate once finished. -fn spawn_blob_collector( - blob_config: data_structures::GetBlobConfig, content_config: data_structures::GetContentConfig, - message_loop_config: data_structures::MessageLoopConfig, blobs_rx: Receiver<(String, String)>, - content_rx: Receiver<(ContentToRetrieve)>) { - - thread::spawn( move || {api_connection::get_content_blobs(blob_config, blobs_rx);}); - thread::spawn( move || {api_connection::get_content(content_config, content_rx);}); - thread::spawn(move || {message_loop(message_loop_config)}); -} - -/// Receive status updates to keep track of when all content has been retrieved. Also handle -/// retrying any failed content or dropping it after too many retries. Every time content is foudn -/// awaiting_content_blobs is incremented; every time content is retrieved or could not be -/// retrieved awaiting_content_blobs is decremented. When it reaches 0 we know we are done. -#[tokio::main] -pub async fn message_loop(mut config: data_structures::MessageLoopConfig) { - - // Send base URLS for content blob retrieval then keep track of when they've all come in - let mut awaiting_content_types:usize = 0; - for (content_type, base_url) in config.urls.into_iter() { - config.blobs_tx.clone().send((content_type, base_url)).await.unwrap(); - awaiting_content_types += 1; - } - // Keep track of found and retrieved content blobs - let mut awaiting_content_blobs: usize = 0; - // Keep track of retry count for failed blobs - let mut retry_map :HashMap = HashMap::new(); - // Keep stats to return to python after run finishes - let mut stats = RunStatistics::new(); - // Loop ends with the run itself, signalling the program is done. - loop { - // Receive status message indicated found content and retrieved content. When all blobs have - // been found, and all found blobs have been retrieved, we are done. - match config.status_rx.try_next() { - Ok(Some(msg)) => { - match msg { - // awaiting_content_types is initially the size of content type * runs for each - // content type. When retrieving pages if we don't get a NextPageUri response - // header, we know we have found all possible blobs for that content type and - // we decrement awaiting_content_types. When it hits 0 we know we found all - // content that can possible be retrieved. - data_structures::StatusMessage::FinishedContentBlobs => { - if awaiting_content_types > 0 { - awaiting_content_types -= 1; - } - }, - // We have found a new content blob while iterating through the pages of them. - // It has been queued up to be retrieved. - data_structures::StatusMessage::FoundNewContentBlob => { - awaiting_content_blobs +=1; - stats.blobs_found += 1; - }, - // A queued up content blob has actually been retrieved so we are done with it. - // When awaiting_content_blobs hits 0 we are done retrieving all actual content - // and we can exit. - data_structures::StatusMessage::RetrievedContentBlob => { - awaiting_content_blobs -= 1; - stats.blobs_successful += 1; - if awaiting_content_types == 0 && awaiting_content_blobs == 0 { - config.content_tx.close_channel(); - break; - } - }, - // A queued up content blob could not be retrieved so we are done with it. - // When awaiting_content_blobs hits 0 we are done retrieving all actual content - // and we can exit. - data_structures::StatusMessage::ErrorContentBlob => { - awaiting_content_blobs -= 1; - stats.blobs_error += 1; - if awaiting_content_types == 0 && awaiting_content_blobs == 0 { - config.content_tx.close_channel(); - break; - } - } - data_structures::StatusMessage::BeingThrottled => warn!("Throttled!"), // TODO: handle being throttled - } - }, - _ => () - } - // Check channel for content pages that could not be retrieved and retry them the user - // defined amount of times. If we can't in that amount of times then give up. - match config.blob_error_rx.try_next() { - Ok(Some((content_type, url))) => { - if retry_map.contains_key(&url) == true { - let retries_left = retry_map.get_mut(&url).unwrap(); - if retries_left == &mut 0 { - error!("Gave up on blob {}", url); - awaiting_content_types -= 1; - stats.blobs_error += 1; - } else { - *retries_left -= 1; - stats.blobs_retried += 1; - warn!("Retry blob {} {}", retries_left, url); - config.blobs_tx.send((content_type, url)).await.unwrap(); - - } - } else { - retry_map.insert(url.clone(), config.retries - 1); - stats.blobs_retried += 1; - warn!("Retry blob {} {}", config.retries - 1, url); - config.blobs_tx.send((content_type, url)).await.unwrap(); - } - }, - _ => (), - }; - // Check channel for content blobs that could not be retrieved and retry them the user - // defined amount of times. If we can't in that amount of times then give up. - match config.content_error_rx.try_next() { - Ok(Some(content)) => { - if retry_map.contains_key(&content.url) == true { - let retries_left = retry_map.get_mut(&content.url).unwrap(); - if retries_left == &mut 0 { - error!("Gave up on content {}", content.url); - awaiting_content_blobs -= 1; - stats.blobs_error += 1; - } else { - *retries_left -= 1; - stats.blobs_retried += 1; - warn!("Retry content {} {}", retries_left, content.url); - config.content_tx.send(content).await.unwrap(); - - } - } else { - retry_map.insert(content.url.to_string(), config.retries - 1); - stats.blobs_retried += 1; - warn!("Retry content {} {}", config.retries - 1, content.url); - config.content_tx.send(content).await.unwrap(); - } - } - _ => (), - } - /* - print!("{esc}[2J{esc}[1;1H", esc = 27 as char); - println!{"Pending content types: {}, Pending content blobs: {}", - awaiting_content_types, awaiting_content_blobs} - */ - } - // We send back stats after exiting the loop, signalling the end of the run. - config.stats_tx.send((stats.blobs_found, stats.blobs_successful, stats.blobs_retried, - stats.blobs_error)).unwrap(); -} - -#[pymodule] -fn alc(_py: Python<'_>, m: &PyModule) -> PyResult<()> { - m.add_class::()?; - Ok(()) -} diff --git a/Source/icon.ico b/Source/icon.ico deleted file mode 100644 index 7fadbf9..0000000 Binary files a/Source/icon.ico and /dev/null differ diff --git a/src/api_connection.rs b/src/api_connection.rs index 8801b99..a7c0e7f 100644 --- a/src/api_connection.rs +++ b/src/api_connection.rs @@ -1,92 +1,120 @@ use std::collections::HashMap; use reqwest; -use log::{debug, info, warn, error}; +use log::{debug, warn, error}; use reqwest::header::{AUTHORIZATION, CONTENT_TYPE, HeaderMap}; use tokio; use serde_json; -use chrono::{DateTime}; use futures::{SinkExt, StreamExt}; use futures::channel::mpsc::{Receiver, Sender}; -use crate::config::ContentTypesSubConfig; -use crate::data_structures::{JsonList, StatusMessage, GetBlobConfig, GetContentConfig, - AuthResult, ContentToRetrieve}; +use crate::config::Config; +use crate::data_structures::{JsonList, StatusMessage, GetBlobConfig, GetContentConfig, AuthResult, + ContentToRetrieve, CliArgs}; /// Return a logged in API connection object. Use the Headers value to make API requests. -pub fn get_api_connection(tenant_id: String, client_id: String, secret_key: String, - publisher_id: String) -> ApiConnection { +pub fn get_api_connection(args: CliArgs, config: Config) -> ApiConnection { let mut api = ApiConnection { - tenant_id, - client_id, - secret_key, - publisher_id, + args, + config, headers: HeaderMap::new(), }; api.login(); api } + + /// Abstraction of an API connection to Azure Management APIs. Can be used to login to the API /// which sets the headers. These headers can then be used to make authenticated requests. pub struct ApiConnection { - pub tenant_id: String, - pub client_id: String, - secret_key: String, - pub publisher_id: String, + pub args: CliArgs, + pub config: Config, pub headers: HeaderMap, } impl ApiConnection { /// Use tenant_id, client_id and secret_key to request a bearer token and store it in /// our headers. Must be called once before requesting any content. fn login(&mut self) { - let auth_url = - format!("https://login.microsoftonline.com/{}/oauth2/token", - self.tenant_id.to_string()); + let auth_url = format!("https://login.microsoftonline.com/{}/oauth2/token", + self.args.tenant_id.to_string()); + let resource = "https://manage.office.com"; - let params = [("grant_type", "client_credentials"), ("client_id", &self.client_id), - ("client_secret", &self.secret_key), ("resource", &resource)]; - self.headers.insert(CONTENT_TYPE, - "application/x-www-form-urlencoded".parse().unwrap()); + + let params = [ + ("grant_type", "client_credentials"), + ("client_id", &self.args.client_id), + ("client_secret", &self.args.secret_key), + ("resource", &resource)]; + + self.headers.insert(CONTENT_TYPE, "application/x-www-form-urlencoded".parse().unwrap()); + let login_client = reqwest::blocking::Client::new(); let json: AuthResult = login_client .post(auth_url) .headers(self.headers.clone()) .form(¶ms) .send() - .unwrap() + .unwrap_or_else(|e| panic!("Could not send API login request: {}", e)) .json() - .unwrap(); + .unwrap_or_else(|e| panic!("Could not parse API login reply: {}", e)); + let token = format!("bearer {}", json.access_token); self.headers.insert(AUTHORIZATION, token.parse().unwrap()); } -} + fn get_base_url(&self) -> String { + format!("https://manage.office.com/api/v1.0/{}/activity/feed", self.args.tenant_id) + } -/// Create a URL that can retrieve the first page of content for each passed content type. Each -/// content type can have multiple runs specified. A run consists of a start- and end date to -/// retrieve data for. Max. time span is 24, so if the user wants to retrieve for e.g. 72 hours, -/// we need 3 runs of 24 hours each. The runs object looks like e.g.: -/// Runs{Audit.Exchange: [(start_date, end_date), (start_date, end_date), (start_date, end_date)} -pub fn create_base_urls( - content_types: ContentTypesSubConfig, tenant_id: String, publisher_id: String, - runs: HashMap>) -> Vec<(String, String)> { - - let mut urls_to_get: Vec<(String, String)> = Vec::new(); - let content_to_get = content_types.get_content_type_strings(); - for content_type in content_to_get { - let content_runs = runs.get(&content_type).unwrap(); - for content_run in content_runs.into_iter() { - let (start_time, end_time) = content_run; - urls_to_get.push( - (content_type.to_string(), - format!("https://manage.office.com/api/v1.0/{}/activity/feed/\ - subscriptions/content?contentType={}&startTime={}&endTime={}\ - &PublisherIdentifier={}", - tenant_id, content_type, start_time, end_time, publisher_id) - )); + pub fn subscribe_to_feeds(&self) { + + let content_types = self.config.collect.content_types.get_content_type_strings(); + + let client = reqwest::blocking::Client::new(); + for content_type in content_types { + let url = format!("{}/subscriptions/start?contentType={}", + self.get_base_url(), + content_type + ); + client + .post(url) + .headers(self.headers.clone()) + .header("content-length", 0) + .send() + .unwrap_or_else( + |e| panic!("Error setting feed subscription status {}", e) + ); + } + } + + + /// Create a URL that can retrieve the first page of content for each passed content type. Each + /// content type can have multiple runs specified. A run consists of a start- and end date to + /// retrieve data for. Max. time span is 24, so if the user wants to retrieve for e.g. 72 hours, + /// we need 3 runs of 24 hours each. The runs object looks like e.g.: + /// Runs{Audit.Exchange: [(start_date, end_date), (start_date, end_date), (start_date, end_date)} + pub fn create_base_urls(&self, runs: HashMap>) -> Vec<(String, String)> { + + let mut urls_to_get: Vec<(String, String)> = Vec::new(); + let content_to_get = self.config.collect.content_types.get_content_type_strings(); + for content_type in content_to_get { + let content_runs = runs.get(&content_type).unwrap(); + for content_run in content_runs.into_iter() { + let (start_time, end_time) = content_run; + urls_to_get.push( + (content_type.to_string(), + format!("{}/subscriptions/content?contentType={}&startTime={}&endTime={}\ + &PublisherIdentifier={}", + self.get_base_url(), + content_type, + start_time, + end_time, + self.args.publisher_id) + )); + } } + urls_to_get } - urls_to_get } @@ -149,7 +177,9 @@ async fn handle_blob_response( match blob_error_tx.send((content_type, url)).await { Err(e) => { error!("Could not resend failed blob, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); + status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap_or_else( + |e| panic!("Could not send status update, channel closed?: {}", e) + ); }, _=> (), } @@ -167,11 +197,15 @@ async fn handle_blob_response_paging( match next_or_not { Some(i) => { let new_url = i.to_str().unwrap().to_string(); - blobs_tx.send((content_type.clone(), new_url)).await.unwrap(); + blobs_tx.send((content_type.clone(), new_url)).await.unwrap_or_else( + |e| panic!("Could not send found blob, channel closed?: {}", e) + ); }, None => { status_tx. - send(StatusMessage::FinishedContentBlobs).await.unwrap(); + send(StatusMessage::FinishedContentBlobs).await.unwrap_or_else( + |e| panic!("Could not send status update, channel closed?: {}", e) + ); } }; } @@ -206,8 +240,12 @@ async fn handle_blob_response_content_uris( let content_to_retrieve = ContentToRetrieve { expiration, content_type: content_type.clone(), content_id, url}; - content_tx.send(content_to_retrieve).await.unwrap(); - status_tx.send(StatusMessage::FoundNewContentBlob).await.unwrap(); + content_tx.send(content_to_retrieve).await.unwrap_or_else( + |e| panic!("Could not send found content, channel closed?: {}", e) + ); + status_tx.send(StatusMessage::FoundNewContentBlob).await.unwrap_or_else( + |e| panic!("Could not send status update, channel closed?: {}", e) + ); } }; } @@ -220,7 +258,9 @@ async fn handle_blob_response_error( match blob_error_tx.send((content_type, url)).await { Err(e) => { error!("Could not resend failed blob, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); + status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap_or_else( + |e| panic!("Could not send status update, channel closed?: {}", e) + ); }, _=> (), } @@ -262,7 +302,9 @@ async fn handle_content_response( match resp.text().await { Ok(json) => { - result_tx.send((json, content_to_retrieve)).await.unwrap(); + result_tx.send((json, content_to_retrieve)).await.unwrap_or_else( + |e| panic!("Could not send status update, channel closed?: {}", e) + ); status_tx.send(StatusMessage::RetrievedContentBlob).await.unwrap(); } Err(e) => { @@ -270,7 +312,9 @@ async fn handle_content_response( match content_error_tx.send(content_to_retrieve).await { Err(e) => { error!("Could not resend failed content, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); + status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap_or_else( + |e| panic!("Could not send status update, channel closed?: {}", e) + ); }, _=> (), } @@ -287,7 +331,9 @@ async fn handle_content_response_error( match content_error_tx.send(content_to_retrieve).await { Err(e) => { error!("Could not resend failed content, dropping it: {}", e); - status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap(); + status_tx.send(StatusMessage::ErrorContentBlob).await.unwrap_or_else( + |e| panic!("Could not send status update, channel closed?: {}", e) + ); }, _=> (), } diff --git a/src/collector.rs b/src/collector.rs index 3760a0b..f45b9b8 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -1,38 +1,50 @@ use std::thread; use std::collections::HashMap; -use log::{warn, error}; +use std::mem::swap; +use std::ops::Div; +use std::time::Instant; +use log::{warn, error, info}; use futures::{SinkExt}; use futures::channel::mpsc::channel; use futures::channel::mpsc::{Sender, Receiver}; use serde_json::Value; use crate::data_structures; use crate::api_connection; +use crate::api_connection::ApiConnection; use crate::config::{Config, ContentTypesSubConfig}; -use crate::data_structures::{Caches, CliArgs}; -use crate::interface::Interface; +use crate::data_structures::{ArbitraryJson, Caches, CliArgs, ContentToRetrieve, JsonList}; +use crate::interfaces::interface::Interface; use crate::interfaces::file_interface::FileInterface; use crate::interfaces::fluentd_interface::FluentdInterface; +use crate::interfaces::graylog_interface::GraylogInterface; -/// # Rust Engine -/// A class instantiated in Python. Python will call the run_once method below, which will start -/// three background threads responsible for retrieving content. Python will then call -/// the get_result method on a loop to drain the results from the results channel until it is -/// disconnected. The three background threads are: +/// # Office Audit Log Collector +/// Will start three background threads responsible for retrieving content: /// - blob_thread: find content blobs and send results to content channel /// - content_thread: retrieve content blobs from content channel, send results to results channel /// - message_loop_thread: keep track of progress, terminate after all content is retrieved +/// Found blobs (which contain logs) are sent to the main thread, which will check filters and known +/// logs to determine whether it must be saved. If it must be saved, it is forwarded to active +/// interfaces. Active interfaces are determined by the config file passed in by the user. pub struct Collector { - args: CliArgs, config: Config, - runs: HashMap>, - interfaces: Vec> + interfaces: Vec>, + + result_rx: Receiver<(String, ContentToRetrieve)>, + stats_rx: Receiver<(usize, usize, usize, usize)>, + kill_tx: tokio::sync::mpsc::Sender, + known_blobs: HashMap, + saved: usize, + cache: Caches, + filters: HashMap, } impl Collector { pub fn new(args: CliArgs, config: Config, runs: HashMap>) -> Collector { + // Initialize interfaces let mut interfaces: Vec> = Vec::new(); if config.output.file.is_some() { interfaces.push(Box::new(FileInterface::new(config.clone()))); @@ -40,77 +52,138 @@ impl Collector { if config.output.fluentd.is_some() { interfaces.push(Box::new(FluentdInterface::new(config.clone()))); } + if config.output.graylog.is_some() { + interfaces.push(Box::new(GraylogInterface::new(config.clone()))); + } + + // Initialize collector threads + let api = api_connection::get_api_connection( + args.clone(), config.clone() + ); + api.subscribe_to_feeds(); + + let known_blobs = config.load_known_blobs(); + let (result_rx, stats_rx, kill_tx) = + get_available_content(api, + config.collect.content_types, + runs.clone(), + &config, + known_blobs.clone()); + + // Initialize collector + let cache_size = config.collect.cache_size.unwrap_or(500000); + let cache = Caches::new(cache_size); + let filters = + if let Some(filter_config) = &config.collect.filter { + filter_config.get_filters() + } else { + HashMap::new() + }; Collector { - args, config, - runs, interfaces, + result_rx, + stats_rx, + known_blobs, + saved: 0, + kill_tx, + filters, + cache } } - /// Non-blocking. Call once to start retrieving logs, which will arrive in the results_rx - /// receiver. Call get_results iteratively to drain the results channel. - pub fn run_once(&mut self) { - let api = api_connection::get_api_connection( - self.args.tenant_id.clone(), self.args.client_id.clone(), - self.args.secret_key.clone(), self.args.publisher_id.clone()); - let (mut result_rx, mut stats_rx) = get_available_content( - api, self.config.collect.contentTypes, self.runs.clone(), &self.config); + /// Monitor all started content retrieval threads, processing results and terminating + /// when all content has been retrieved (signalled by a final run stats message). + pub fn monitor(&mut self) { + + let start = Instant::now(); + loop { + if let Some(timeout) = self.config.collect.global_timeout { + if timeout > 0 && start.elapsed().as_secs().div(60) as usize >= timeout { + warn!("Global timeout expired, request collector stop."); + self.kill_tx.blocking_send(true).unwrap(); + } + } + // Run stats are only returned when all content has been retrieved, + // therefore this signals the end of the run. + if self.check_stats() { + break + } - let mut known_blobs = self.config.load_known_blobs(); - let mut known_logs = self.config.load_known_logs(); + // Check if a log came in. + self.check_results(); + } + self.end_run(); + } - let mut skipped: usize = 0; - let mut saved: usize = 0; - let cache_size = self.config.collect.cacheSize.unwrap_or(500000); + pub fn end_run(&mut self) { + self.config.save_known_blobs(&self.known_blobs); + } - let mut cache = Caches::default(); + fn check_results(&mut self) { - loop { + if let Ok(Some((msg, content))) = self.result_rx.try_next() { + self.handle_content(msg, content); + } + } - if let Ok(Some((a, b, c, d))) = stats_rx.try_next() { - self.output(cache); - print!( -"Blobs found: {}\nBlobs successful: {}\nBlobs failed: {}\nBlobs retried: {}\nLogs saved: {}\nKnown logs skipped: {}", -a, b, c, d, saved, skipped); - break + fn handle_content(&mut self, msg: String, content: ContentToRetrieve) { + self.known_blobs.insert(content.content_id.clone(), content.expiration.clone()); + if let Ok(logs) = serde_json::from_str::(&msg) { + for log in logs { + self.handle_log(log, &content); } + } else { + warn!("Skipped log that could not be parsed: {}", content.content_id) + } + } - if let Ok(Some((msg, content))) = result_rx.try_next() { - if let Ok(logs) = - serde_json::from_str::<(Vec>)>(&msg) { - - known_blobs.insert(content.content_id, content.expiration); - for mut log in logs { - let log_id = log.get("Id").unwrap().to_string(); - log.insert("OriginFeed".to_string(), - Value::String(content.content_type.to_string())); - if known_logs.contains_key(&log_id) { - skipped += 1; - continue - } - let log_creation_time = log.get("CreationTime").unwrap() - .to_string(); - known_logs.insert(log_id, log_creation_time); - - cache.insert(log, &content.content_type); - saved += 1; - if saved % cache_size == 0 { - self.output(cache); - cache = Caches::default(); - } + fn handle_log(&mut self, mut log: ArbitraryJson, content: &ContentToRetrieve) { + + if let Some(filters) = self.filters.get(&content.content_type) { + for (k, v) in filters.iter() { + if let Some(val) = log.get(k) { + if val != v { + return } - } else { - warn!("Skipped log that could not be parsed: {}", content.content_id) } } } - self.config.save_known_blobs(&known_blobs); - self.config.save_known_logs(&known_logs); + log.insert("OriginFeed".to_string(), + Value::String(content.content_type.to_string())); + self.cache.insert(log, &content.content_type); + self.saved += 1; + if self.cache.full() { + self.output(); + } + } + fn check_stats(&mut self) -> bool { + + if let Ok(Some((found, + successful, + retried, + failed))) = self.stats_rx.try_next() { + + self.output(); + let output = self.get_output_string( + found, + successful, + failed, + retried, + self.saved, + ); + info!("{}", output); + println!("{}", output); + true + } else { + false + } } - fn output(&mut self, cache: Caches) { + fn output(&mut self) { + let mut cache = Caches::new(self.cache.size); + swap(&mut self.cache, &mut cache); if self.interfaces.len() == 1 { self.interfaces.get_mut(0).unwrap().send_logs(cache); } else { @@ -119,6 +192,19 @@ a, b, c, d, saved, skipped); } } } + + fn get_output_string(&self, found: usize, successful: usize, failed: usize, retried: usize, + saved: usize) -> String { + format!("\ + Blobs found: {}\n\ + Blobs successful: {}\n\ + Blobs failed: {}\n\ + Blobs retried: {}\n\ + Logs saved: {}\n", + found, successful, failed, retried, saved + ) + } + } @@ -126,88 +212,133 @@ a, b, c, d, saved, skipped); /// - Blob thread: Collect available content blobs /// - Content thread: Collect the blobs found by blob thread /// - Message loop: Communicates with other threads to handle retries and terminate when finished -fn initialize_configs( - api: api_connection::ApiConnection, content_types: ContentTypesSubConfig, +fn initialize_channels( + api: ApiConnection, content_types: ContentTypesSubConfig, runs: HashMap>, config: &Config) -> (data_structures::GetBlobConfig, data_structures::GetContentConfig, - data_structures::MessageLoopConfig, Receiver<(String, String)>, - Receiver, - Receiver<(String, data_structures::ContentToRetrieve)>, - Receiver<(usize, usize, usize, usize)>) { + data_structures::MessageLoopConfig, + Receiver<(String, String)>, + Receiver, + Receiver<(String, ContentToRetrieve)>, + Receiver<(usize, usize, usize, usize)>, + tokio::sync::mpsc::Sender) { - let urls = api_connection::create_base_urls( - content_types, api.tenant_id, api.publisher_id, runs); + let urls = api.create_base_urls(runs); // Create channels to communicate with async closures let (status_tx, status_rx): - (Sender, Receiver) = - channel(100000); - let (blobs_tx, blobs_rx): (Sender<(String, String)>, Receiver<(String, String)>) = - channel(100000); + (Sender, + Receiver) = channel(100000); + + let (blobs_tx, blobs_rx): + (Sender<(String, String)>, + Receiver<(String, String)>) = channel(100000); + let (blob_error_tx, blob_error_rx): - (Sender<(String, String)>, Receiver<(String, String)>) = channel(100000); - let (content_tx, content_rx): (Sender, - Receiver) = - channel(100000); + (Sender<(String, String)>, + Receiver<(String, String)>) = channel(100000); + + let (content_tx, content_rx): + (Sender, + Receiver) = channel(100000); + let (content_error_tx, content_error_rx): - (Sender, - Receiver) = channel(100000000); + (Sender, + Receiver) = channel(100000000); + let (result_tx, result_rx): - (Sender<(String, data_structures::ContentToRetrieve)>, - Receiver<(String, data_structures::ContentToRetrieve)>) = channel(100000000); + (Sender<(String, ContentToRetrieve)>, + Receiver<(String, ContentToRetrieve)>) = channel(100000000); + let (stats_tx, stats_rx): - (Sender<(usize, usize, usize, usize)>, Receiver<(usize, usize, usize, usize)>) = channel(100000000); + (Sender<(usize, usize, usize, usize)>, + Receiver<(usize, usize, usize, usize)>) = channel(100000000); + + let (kill_tx, kill_rx): (tokio::sync::mpsc::Sender, + tokio::sync::mpsc::Receiver) = tokio::sync::mpsc::channel(1000); - let blob_config = data_structures::GetBlobConfig { client: reqwest::Client::new(), headers: api.headers.clone(), + let blob_config = data_structures::GetBlobConfig { + client: reqwest::Client::new(), + headers: api.headers.clone(), status_tx: status_tx.clone(), blobs_tx: blobs_tx.clone(), blob_error_tx: blob_error_tx.clone(), content_tx: content_tx.clone(), - threads: config.collect.maxThreads.unwrap_or(50) + threads: config.collect.max_threads.unwrap_or(50) }; let content_config = data_structures::GetContentConfig { - client: reqwest::Client::new(), headers: api.headers.clone(), result_tx: result_tx.clone(), - content_error_tx: content_error_tx.clone(), status_tx: status_tx.clone(), - threads: config.collect.maxThreads.unwrap_or(50) + client: reqwest::Client::new(), + headers: api.headers.clone(), + result_tx: result_tx.clone(), + content_error_tx: content_error_tx.clone(), + status_tx: status_tx.clone(), + threads: config.collect.max_threads.unwrap_or(50) }; let message_loop_config = data_structures::MessageLoopConfig { - content_tx: content_tx.clone(), blobs_tx: blobs_tx.clone(), stats_tx: stats_tx.clone(), - urls, content_error_rx, status_rx, blob_error_rx, content_types, - retries: config.collect.retries.unwrap_or(3) + content_tx: content_tx.clone(), + blobs_tx: blobs_tx.clone(), + stats_tx: stats_tx.clone(), + urls, + content_error_rx, + status_rx, + blob_error_rx, + content_types, + retries: config.collect.retries.unwrap_or(3), + kill_rx, }; - return (blob_config, content_config, message_loop_config, blobs_rx, content_rx, result_rx, stats_rx) + return (blob_config, content_config, message_loop_config, blobs_rx, content_rx, result_rx, + stats_rx, kill_tx) } /// Get all the available log content for a list of content types and runs (start- and end times /// of content to receive). -fn get_available_content(api: api_connection::ApiConnection, content_types: ContentTypesSubConfig, - runs: HashMap>, config: &Config) - -> (Receiver<(String, data_structures::ContentToRetrieve)>, - Receiver<(usize, usize, usize, usize)>) { - - let (blob_config, content_config, message_loop_config, - blobs_rx, content_rx, result_rx, stats_rx) - = initialize_configs(api, content_types, runs, config); - spawn_blob_collector(blob_config, content_config, message_loop_config, blobs_rx, content_rx, config); - (result_rx, stats_rx) +fn get_available_content(api: ApiConnection, + content_types: ContentTypesSubConfig, + runs: HashMap>, + config: &Config, + known_blobs: HashMap) + -> (Receiver<(String, ContentToRetrieve)>, + Receiver<(usize, usize, usize, usize)>, + tokio::sync::mpsc::Sender) { + + let (blob_config, + content_config, + message_loop_config, + blobs_rx, + content_rx, + result_rx, + stats_rx, + kill_tx) = initialize_channels(api, content_types, runs, config); + + spawn_blob_collector(blob_config, + content_config, + message_loop_config, + blobs_rx, + content_rx, + known_blobs); + + (result_rx, stats_rx, kill_tx) } + /// Spawn threads running the actual collectors, and a message loop thread to keep track of /// progress and terminate once finished. fn spawn_blob_collector( - blob_config: data_structures::GetBlobConfig, content_config: data_structures::GetContentConfig, - message_loop_config: data_structures::MessageLoopConfig, blobs_rx: Receiver<(String, String)>, - content_rx: Receiver, config: &Config) { - - let known_blobs= config.load_known_blobs(); + blob_config: data_structures::GetBlobConfig, + content_config: data_structures::GetContentConfig, + message_loop_config: data_structures::MessageLoopConfig, + blobs_rx: Receiver<(String, String)>, + content_rx: Receiver, + known_blobs: HashMap) { thread::spawn( move || {api_connection::get_content_blobs(blob_config, blobs_rx, known_blobs);}); thread::spawn( move || {api_connection::get_content(content_config, content_rx);}); thread::spawn(move || {message_loop(message_loop_config)}); } + /// Receive status updates to keep track of when all content has been retrieved. Also handle /// retrying any failed content or dropping it after too many retries. Every time content is foudn /// awaiting_content_blobs is incremented; every time content is retrieved or could not be @@ -216,7 +347,7 @@ fn spawn_blob_collector( pub async fn message_loop(mut config: data_structures::MessageLoopConfig) { // Send base URLS for content blob retrieval then keep track of when they've all come in - let mut awaiting_content_types:usize = 0; + let mut awaiting_content_types: usize = 0; for (content_type, base_url) in config.urls.into_iter() { config.blobs_tx.clone().send((content_type, base_url)).await.unwrap(); awaiting_content_types += 1; @@ -230,6 +361,12 @@ pub async fn message_loop(mut config: data_structures::MessageLoopConfig) { // Loop ends with the run itself, signalling the program is done. loop { + if let Ok(msg) = config.kill_rx.try_recv() { + if msg { + info!("Stopping collector."); + break + } + } // Receive status message indicated found content and retrieved content. When all blobs have // been found, and all found blobs have been retrieved, we are done. if let Ok(Some(msg)) = config.status_rx.try_next() { @@ -322,9 +459,6 @@ pub async fn message_loop(mut config: data_structures::MessageLoopConfig) { config.content_tx.send(content).await.unwrap(); } } - print!("{esc}[2J{esc}[1;1H", esc = 27 as char); - println!{"Pending content types: {}, Pending content blobs: {}", - awaiting_content_types, awaiting_content_blobs} } // We send back stats after exiting the loop, signalling the end of the run. config.stats_tx.send((stats.blobs_found, stats.blobs_successful, stats.blobs_retried, diff --git a/src/config.rs b/src/config.rs index fdcb86d..487e5fd 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,10 +1,11 @@ use std::collections::HashMap; -use std::ffi::{OsStr, OsString}; +use std::ffi::OsString; use std::fs::File; -use std::io::{LineWriter, Read, Write}; +use std::io::{BufReader, LineWriter, Read, Write}; use std::path::Path; +use chrono::{DateTime, NaiveDateTime, Utc}; use serde_derive::Deserialize; -use serde_json::Value; +use crate::data_structures::ArbitraryJson; #[derive(Deserialize, Clone, Debug)] @@ -15,16 +16,26 @@ pub struct Config { } impl Config { + pub fn new(path: String) -> Self { + + let open_file = File::open(path) + .unwrap_or_else(|e| panic!("Config path could not be opened: {}", e.to_string())); + let reader = BufReader::new(open_file); + let config: Config = serde_yaml::from_reader(reader) + .unwrap_or_else(|e| panic!("Config could not be parsed: {}", e.to_string())); + config + } + pub fn get_needed_runs(&self) -> HashMap> { let mut runs: HashMap> = HashMap::new(); let end_time = chrono::Utc::now(); - let hours_to_collect = self.collect.hoursToCollect.unwrap_or(24); + let hours_to_collect = self.collect.hours_to_collect.unwrap_or(24); if hours_to_collect > 168 { panic!("Hours to collect cannot be more than 168 due to Office API limits"); } - for content_type in self.collect.contentTypes.get_content_type_strings() { + for content_type in self.collect.content_types.get_content_type_strings() { runs.insert(content_type.clone(), vec!()); let mut start_time = end_time - chrono::Duration::try_hours(hours_to_collect) .unwrap(); @@ -45,40 +56,32 @@ impl Config { } pub fn load_known_blobs(&self) -> HashMap { - - let mut known_blobs_path = Path::new(self.collect.workingDir.as_ref() - .unwrap_or(&"./".to_string())).join(Path::new("known_blobs")); - self.load_known_content(known_blobs_path.as_mut_os_string()) - } - - pub fn load_known_logs(&self) -> HashMap { - - let mut known_logs_path = Path::new(self.collect.workingDir.as_ref() - .unwrap_or(&"./".to_string())).join(Path::new("known_logs")); - self.load_known_content(&known_logs_path.as_mut_os_string()) + let working_dir = if let Some(i) = &self.collect.working_dir { + i.as_str() + } else { + "./" + }; + + let file_name = Path::new("known_blobs"); + let mut path = Path::new(working_dir).join(file_name); + self.load_known_content(path.as_mut_os_string()) } pub fn save_known_blobs(&mut self, known_blobs: &HashMap) { - let mut known_blobs_path = Path::new(self.collect.workingDir.as_ref() + let mut known_blobs_path = Path::new(self.collect.working_dir.as_ref() .unwrap_or(&"./".to_string())).join(Path::new("known_blobs")); self.save_known_content(known_blobs, &known_blobs_path.as_mut_os_string()) } - pub fn save_known_logs(&mut self, known_logs: &HashMap) { - - let mut known_logs_path = Path::new(self.collect.workingDir.as_ref() - .unwrap_or(&"./".to_string())).join(Path::new("known_logs")); - self.save_known_content(known_logs, &known_logs_path.as_mut_os_string()) - } fn load_known_content(&self, path: &OsString) -> HashMap { let mut known_content = HashMap::new(); - if !Path::new(path).exists() { return known_content } + // Load file let mut known_content_file = File::open(path).unwrap(); let mut known_content_string = String::new(); known_content_file.read_to_string(&mut known_content_string).unwrap(); @@ -86,8 +89,19 @@ impl Config { if line.trim().is_empty() { continue } + // Skip load expired content + let now = Utc::now(); if let Some((id, creation_time)) = line.split_once(',') { - known_content.insert(id.trim().to_string(), creation_time.trim().to_string()); + let invalidated = if let Ok(i) = + NaiveDateTime::parse_from_str(creation_time, "%Y-%m-%dT%H:%M:%S.%fZ") { + let time_utc = DateTime::::from_naive_utc_and_offset(i, Utc); + now >= time_utc + } else { + true + }; + if !invalidated { + known_content.insert(id.trim().to_string(), creation_time.trim().to_string()); + } else {println!("SKIPPA: {}", line)} } } known_content @@ -114,48 +128,52 @@ pub struct LogSubConfig { #[derive(Deserialize, Clone, Debug)] pub struct CollectSubConfig { - pub workingDir: Option, - pub cacheSize: Option, - pub contentTypes: ContentTypesSubConfig, - pub maxThreads: Option, - pub globalTimeout: Option, + #[serde(rename = "workingDir")] + pub working_dir: Option, + #[serde(rename = "cacheSize")] + pub cache_size: Option, + #[serde(rename = "contentTypes")] + pub content_types: ContentTypesSubConfig, + #[serde(rename = "maxThreads")] + pub max_threads: Option, + #[serde(rename = "globalTimeout")] + pub global_timeout: Option, pub retries: Option, - pub retryCooldown: Option, - pub autoSubscribe: Option, // Deprecated - pub resume: Option, // Deprecated - pub hoursToCollect: Option, - pub skipKnownLogs: Option, + #[serde(rename = "hoursToCollect")] + pub hours_to_collect: Option, + #[serde(rename = "skipKnownLogs")] + pub skip_known_logs: Option, pub filter: Option, } #[derive(Deserialize, Copy, Clone, Debug)] pub struct ContentTypesSubConfig { #[serde(rename = "Audit.General")] - pub general: bool, + pub general: Option, #[serde(rename = "Audit.AzureActiveDirectory")] - pub azureActiveDirectory: bool, + pub azure_active_directory: Option, #[serde(rename = "Audit.Exchange")] - pub exchange: bool, + pub exchange: Option, #[serde(rename = "Audit.SharePoint")] - pub sharePoint: bool, + pub share_point: Option, #[serde(rename = "DLP.All")] - pub dlp: bool, + pub dlp: Option, } impl ContentTypesSubConfig { pub fn get_content_type_strings(&self) -> Vec { let mut results = Vec::new(); - if self.general { + if self.general.unwrap_or(false) { results.push("Audit.General".to_string()) } - if self.azureActiveDirectory { + if self.azure_active_directory.unwrap_or(false) { results.push("Audit.AzureActiveDirectory".to_string()) } - if self.exchange { + if self.exchange.unwrap_or(false) { results.push("Audit.Exchange".to_string()) } - if self.sharePoint { + if self.share_point.unwrap_or(false) { results.push("Audit.SharePoint".to_string()) } - if self.dlp { + if self.dlp.unwrap_or(false) { results.push("DLP.All".to_string()) } results @@ -165,24 +183,42 @@ impl ContentTypesSubConfig { #[derive(Deserialize, Clone, Debug)] pub struct FilterSubConfig { #[serde(rename = "Audit.General")] - pub general: Option>, + pub general: Option, #[serde(rename = "Audit.AzureActiveDirectory")] - pub azureActiveDirectory: Option>, + pub azure_active_directory: Option, #[serde(rename = "Audit.Exchange")] - exchange: Option>, + pub exchange: Option, #[serde(rename = "Audit.SharePoint")] - pub sharePoint: Option>, + pub share_point: Option, #[serde(rename = "DLP.All")] - pub dlp: Option>, + pub dlp: Option, +} +impl FilterSubConfig { + pub fn get_filters(&self) -> HashMap { + + let mut results = HashMap::new(); + if let Some(filter) = self.general.as_ref() { + results.insert("Audit.General".to_string(), filter.clone()); + } + if let Some(filter) = self.azure_active_directory.as_ref() { + results.insert("Audit.AzureActiveDirectory".to_string(), filter.clone()); + } + if let Some(filter) = self.share_point.as_ref() { + results.insert("Audit.SharePoint".to_string(), filter.clone()); + } + if let Some(filter) = self.exchange.as_ref() { + results.insert("Audit.Exchange".to_string(), filter.clone()); + } + if let Some(filter) = self.dlp.as_ref() { + results.insert("DLP.All".to_string(), filter.clone()); + } + results + } } #[derive(Deserialize, Clone, Debug)] pub struct OutputSubConfig { pub file: Option, - pub azureLogAnalytics: Option, - pub azureTable: Option, - pub azureBlob: Option, - pub sql: Option, pub graylog: Option, pub fluentd: Option, } @@ -190,43 +226,21 @@ pub struct OutputSubConfig { #[derive(Deserialize, Clone, Debug)] pub struct FileOutputSubConfig { pub path: String, - pub separateByContentType: Option, - pub separator: Option, -} - -#[derive(Deserialize, Clone, Debug)] -pub struct OmsOutputSubConfig { - pub workspaceId: String, -} - -#[derive(Deserialize, Clone, Debug)] -pub struct AzTableOutputSubConfig { - pub tableName: String, -} - -#[derive(Deserialize, Clone, Debug)] -pub struct AzBlobOutputSubConfig { - pub containerName: String, - pub blobName: String, - pub tempPath: Option, - pub separateByContentType: Option, + #[serde(rename = "separateByContentType")] + pub separate_by_content_type: Option, pub separator: Option, } -#[derive(Deserialize, Clone, Debug)] -pub struct sqlOutputSubConfig { - pub chunkSize: Option, -} - #[derive(Deserialize, Clone, Debug)] pub struct GraylogOutputSubConfig { pub address: String, - pub port: usize, + pub port: u16, } #[derive(Deserialize, Clone, Debug)] pub struct FluentdOutputSubConfig { - pub tenantName: String, + #[serde(rename = "tenantName")] + pub tenant_name: String, pub address: String, - pub port: usize, + pub port: u16, } diff --git a/src/data_structures.rs b/src/data_structures.rs index 7930fd0..e21e2db 100644 --- a/src/data_structures.rs +++ b/src/data_structures.rs @@ -1,26 +1,43 @@ use futures::channel::mpsc::{Sender, Receiver}; use std::collections::HashMap; use reqwest::header::HeaderMap; -use serde_derive::{Deserialize}; +use serde_derive::Deserialize; use clap::Parser; use log::warn; use serde_json::Value; use crate::config::ContentTypesSubConfig; /// List of JSON responses (used to represent content blobs) -pub type JsonList = Vec>; +pub type ArbitraryJson = HashMap; +pub type JsonList = Vec; #[derive(Default, Clone)] pub struct Caches { - general: Vec>, - aad: Vec>, - exchange: Vec>, - sharepoint: Vec>, - dlp: Vec>, + pub general: JsonList, + pub aad: JsonList, + pub exchange: JsonList, + pub sharepoint: JsonList, + pub dlp: JsonList, + pub size: usize, } impl Caches { - pub fn insert(&mut self, log: HashMap, content_type: &String) { + + pub fn full(&self) -> bool { + let size = self.general.len() + + self.aad.len() + + self.exchange.len() + + self.sharepoint.len() + + self.dlp.len(); + size >= self.size + } + + pub fn new(size: usize) -> Self { + let mut cache = Caches::default(); + cache.size = size; + cache + } + pub fn insert(&mut self, log: ArbitraryJson, content_type: &String) { match content_type.as_str() { "Audit.General" => self.general.push(log), "Audit.AzureActiveDirectory" => self.aad.push(log), @@ -31,7 +48,7 @@ impl Caches { } } - pub fn get_all_types(&self) -> [(String, &Vec>); 5] { + pub fn get_all_types(&self) -> [(String, &JsonList); 5] { [ ("Audit.General".to_string(), &self.general), ("Audit.AzureActiveDirectory".to_string(), &self.aad), @@ -41,13 +58,13 @@ impl Caches { ] } - pub fn get_all(&self) -> [&Vec>; 5] { + pub fn get_all(&mut self) -> [&mut JsonList; 5] { [ - &self.general, - &self.aad, - &self.exchange, - &self.sharepoint, - &self.dlp + &mut self.general, + &mut self.aad, + &mut self.exchange, + &mut self.sharepoint, + &mut self.dlp ] } } @@ -110,6 +127,7 @@ pub struct GetContentConfig { /// finished. pub struct MessageLoopConfig { pub status_rx: Receiver, + pub kill_rx: tokio::sync::mpsc::Receiver, pub stats_tx: Sender<(usize, usize, usize, usize)>, pub blobs_tx: Sender<(String, String)>, pub blob_error_rx: Receiver<(String, String)>, @@ -140,7 +158,7 @@ impl RunStatistics { } -#[derive(Parser, Debug)] +#[derive(Parser, Debug, Clone)] #[command(version, about, long_about = None)] /// Collect audit logs from Office Management APIs. /// Complete all preparation steps in README.MD diff --git a/src/interfaces/file_interface.rs b/src/interfaces/file_interface.rs index b346299..724e5eb 100644 --- a/src/interfaces/file_interface.rs +++ b/src/interfaces/file_interface.rs @@ -1,71 +1,79 @@ use std::collections::HashMap; use std::path::Path; use chrono::Utc; -use csv::{StringRecord, Writer}; -use log::warn; -use serde_json::Value; +use csv::{Writer}; use crate::config::Config; -use crate::data_structures::Caches; -use crate::interface::Interface; +use crate::data_structures::{ArbitraryJson, Caches}; +use crate::interfaces::interface::Interface; +/// Interface that send found logs to one CSV file, or one CSV file per content type. pub struct FileInterface { config: Config, paths: HashMap, postfix: String, } + impl FileInterface { pub fn new(config: Config) -> Self { - let mut paths: HashMap = HashMap::new(); let postfix = Utc::now().format("%Y%m%d%H%M%S").to_string(); let mut interface = FileInterface { config, - paths, + paths: HashMap::new(), postfix: postfix.clone() }; if interface.separate_by_content_type() { - let stem = Path::new( - &interface.config.output.file.as_ref().unwrap().path.clone()) - .file_stem().unwrap().to_str().unwrap().to_string(); - for content_type in interface.config.collect.contentTypes.get_content_type_strings() { - let file = format!("{}_{}_{}.csv", postfix.clone(), stem.clone(), - content_type.replace('.', "")); - interface.paths.insert(content_type, file); - } + interface.create_content_type_paths(); } interface } -} -impl Interface for FileInterface { - fn send_logs(&mut self, cache: Caches) { - if !self.separate_by_content_type() { - self.send_logs_unified(cache); - } else { - self.send_logs_separated(cache); + /// Based on the desired CSV path, create a path for each content type. Used + /// when SeparateByContentType is true. + fn create_content_type_paths(&mut self) { + let path = Path::new(&self.config.output.file + .as_ref() + .unwrap() + .path); + let dir = path.parent(); + let stem = path + .file_stem().unwrap() + .to_str().unwrap() + .to_string(); + + let content_strings = self.config.collect.content_types.get_content_type_strings(); + for content_type in content_strings { + let mut file = format!("{}_{}_{}.csv", + self.postfix.clone(), + stem.clone(), + content_type.replace('.', "")); + if let Some(parent) = dir { + file = format!("{}/{}", parent.to_str().unwrap(), file); + } + self.paths.insert(content_type, file); } } -} - -impl FileInterface { + /// Convenience method to get config property. fn separate_by_content_type(&self) -> bool { - self.config.output.file.as_ref().unwrap().separateByContentType.unwrap_or(false) + self.config.output.file.as_ref().unwrap().separate_by_content_type.unwrap_or(false) } - fn send_logs_unified(&self, cache: Caches) { + /// Save the logs of all content types in a single CSV file. + fn send_logs_unified(&self, mut cache: Caches) { // Get columns from all content types - let all_logs = cache.get_all(); + let mut all_logs = cache.get_all(); let mut columns: Vec = Vec::new(); - for content_type in all_logs { + for content_type in all_logs.iter_mut() { columns.append(&mut get_all_columns(content_type)); } + let mut wrt = Writer::from_path(&self.config.output.file.as_ref().unwrap().path).unwrap(); wrt.write_record(&columns).unwrap(); - for logs in all_logs { - for log in logs { + for logs in all_logs.iter_mut() { + for log in logs.iter_mut() { let new_log = fill_log(log, &columns); wrt.write_record(new_log).unwrap(); } @@ -73,6 +81,7 @@ impl FileInterface { wrt.flush().unwrap(); } + /// Save the logs of each content type to a separate CSV file. fn send_logs_separated(&self, cache: Caches) { for (content_type, logs) in cache.get_all_types() { if logs.is_empty() { @@ -80,6 +89,7 @@ impl FileInterface { } let columns = get_all_columns(logs); let path = self.paths.get(&content_type).unwrap(); + println!("HIER {}", path.clone()); let mut wrt = Writer::from_path(path).unwrap(); wrt.write_record(&columns).unwrap(); @@ -92,8 +102,19 @@ impl FileInterface { } } +impl Interface for FileInterface { + fn send_logs(&mut self, logs: Caches) { + if !self.separate_by_content_type() { + self.send_logs_unified(logs); + } else { + self.send_logs_separated(logs); + } + } +} + -fn get_all_columns(logs: &[HashMap]) -> Vec { +/// Get all column names in a heterogeneous collection of logs. +fn get_all_columns(logs: &[ArbitraryJson]) -> Vec { let mut columns: Vec = Vec::new(); for log in logs.iter() { @@ -106,7 +127,9 @@ fn get_all_columns(logs: &[HashMap]) -> Vec { columns } -fn fill_log(log: &HashMap, columns: &Vec) -> Vec { +/// Due to heterogeneous logs not all logs have all columns. Fill missing columns of +/// a log with an empty string. +fn fill_log(log: &ArbitraryJson, columns: &Vec) -> Vec { let mut new_log= Vec::new(); for col in columns { if !log.contains_key(col) { diff --git a/src/interfaces/fluentd_interface.rs b/src/interfaces/fluentd_interface.rs index b87f01b..5ea1623 100644 --- a/src/interfaces/fluentd_interface.rs +++ b/src/interfaces/fluentd_interface.rs @@ -1,15 +1,10 @@ -use std::collections::HashMap; -use std::path::Path; use std::time::SystemTime; -use chrono::{DateTime, Duration, FixedOffset, NaiveDateTime, NaiveTime, Utc}; +use chrono::{DateTime, NaiveDateTime, Utc}; use core::time; -use csv::Writer; -use futures::future::Lazy; use poston::{Client, Settings, WorkerPool}; -use serde_json::Value; use crate::config::Config; -use crate::data_structures::Caches; -use crate::interface::Interface; +use crate::data_structures::{ArbitraryJson, Caches}; +use crate::interfaces::interface::Interface; pub struct FluentdInterface { config: Config, @@ -38,22 +33,30 @@ impl FluentdInterface { pool, } } + + fn get_tenant_name(&self) -> String { + self.config.output.fluentd.as_ref().unwrap().tenant_name.clone() + } } impl Interface for FluentdInterface { - fn send_logs(&mut self, cache: Caches) { + fn send_logs(&mut self, mut logs: Caches) { - let all_logs = cache.get_all(); + let all_logs = logs.get_all(); for logs in all_logs { for log in logs { - let time_string = log.get("CreationTime").unwrap().as_str().unwrap(); - let time = NaiveDateTime::parse_from_str( - time_string, "%Y-%m-%dT%H:%M:%S").unwrap(); - let time_utc = DateTime::::from_naive_utc_and_offset(time, Utc); - let timestamp = SystemTime::from(time_utc); - self.pool.send(self.config.output.fluentd.as_ref().unwrap().tenantName.clone(), - log, timestamp).unwrap(); + let timestamp = get_timestamp(log); + self.pool.send(self.get_tenant_name(), log, timestamp).unwrap(); } } } +} + +fn get_timestamp(log: &ArbitraryJson) -> SystemTime { + + let time_string = log.get("CreationTime").unwrap().as_str().unwrap(); + let time = NaiveDateTime::parse_from_str( + time_string, "%Y-%m-%dT%H:%M:%S").unwrap(); + let time_utc = DateTime::::from_naive_utc_and_offset(time, Utc); + SystemTime::from(time_utc) } \ No newline at end of file diff --git a/src/interfaces/graylog_interface.rs b/src/interfaces/graylog_interface.rs new file mode 100644 index 0000000..62c6cd5 --- /dev/null +++ b/src/interfaces/graylog_interface.rs @@ -0,0 +1,110 @@ +use std::io::{ErrorKind, Write}; +use std::net::{TcpStream, ToSocketAddrs}; +use std::time::Duration; +use chrono::{DateTime, NaiveDateTime, Utc}; +use log::{warn}; +use serde_json::Value; +use crate::config::Config; +use crate::data_structures::{ArbitraryJson, Caches}; +use crate::interfaces::interface::Interface; + +pub struct GraylogInterface { + address: String, + port: u16, +} + +impl GraylogInterface { + + pub fn new(config: Config) -> Self { + + let address = config.output.graylog.as_ref().unwrap().address.clone(); + let port = config.output.graylog.as_ref().unwrap().port; + let interface = GraylogInterface { + address, + port + }; + + // Test socket, if we cannot connect there's no point in running + let _ = interface.get_socket(); + interface + } +} + +impl GraylogInterface { + fn get_socket(&self) -> TcpStream { + + let ip_addr = (self.address.clone(), self.port) + .to_socket_addrs() + .expect("Unable to resolve the IP address") + .next() + .expect("DNS resolution returned no IP addresses"); + TcpStream::connect_timeout(&ip_addr, Duration::from_secs(10)).unwrap_or_else( + |e| panic!("Could not connect to Graylog interface on: {}:{} with: {}", + self.address, self.port, e) + ) + } +} + +impl Interface for GraylogInterface { + + fn send_logs(&mut self, mut logs: Caches) { + + let mut all_logs = logs.get_all(); + for logs in all_logs.iter_mut() { + for log in logs.iter_mut() { + + match add_timestamp_field(log) { + Ok(()) => (), + Err(e) => { + warn!("Could parse timestamp for log in Graylog interface: {}", e); + continue + } + } + + match serde_json::to_string(log) { + Ok(json) => { + let mut socket = self.get_socket(); + socket.write_all(&json.as_bytes()).unwrap_or_else( + |e| warn!("Could not send log to Graylog interface: {}", e)); + socket.flush().unwrap_or_else( + |e| warn!("Could not send log to Graylog interface: {}", e)); + } + Err(e) => warn!("Could not serialize a log in Graylog interface: {}.", e) + } + } + } + } +} + + +pub fn add_timestamp_field(log: &mut ArbitraryJson) -> Result<(), std::io::Error> { + + let time_value = if let Some(i) = log.get("CreationTime") { + i + } else { + return Err(std::io::Error::new( + ErrorKind::NotFound, "Expected CreationTime field".to_string())) + }; + + let time_string = if let Some(i) = time_value.as_str() { + i + } else { + return Err(std::io::Error::new( + ErrorKind::NotFound, "Could not convert timestamp field to string".to_string())) + + }; + + let time = if let Ok(i) = + NaiveDateTime::parse_from_str(time_string, "%Y-%m-%dT%H:%M:%S") { + i + } else { + return Err(std::io::Error::new( + ErrorKind::NotFound, "Could parse time of log".to_string())) + }; + + let time_utc = DateTime::::from_naive_utc_and_offset(time, Utc); + let mut time_stamp = time_utc.format("%Y-%m-%d %H:%M:%S.%f").to_string(); + time_stamp = time_stamp[..time_stamp.len() - 6].to_string(); + log.insert("timestamp".to_string(), Value::String(time_stamp)); + Ok(()) +} diff --git a/src/interface.rs b/src/interfaces/interface.rs similarity index 100% rename from src/interface.rs rename to src/interfaces/interface.rs diff --git a/src/interfaces/mod.rs b/src/interfaces/mod.rs index 59d7fcb..59cd50c 100644 --- a/src/interfaces/mod.rs +++ b/src/interfaces/mod.rs @@ -1,2 +1,4 @@ -pub mod file_interface; -pub mod fluentd_interface; \ No newline at end of file +pub(crate) mod file_interface; +pub(crate) mod fluentd_interface; +pub(crate) mod graylog_interface; +pub mod interface; diff --git a/src/main.rs b/src/main.rs index 2c339e4..88f4d0b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,32 +1,19 @@ -use std::collections::HashMap; -use std::fs::{File, read}; -use std::io::{BufReader, Read}; -use chrono::DateTime; use clap::Parser; use crate::collector::Collector; -use crate::config::{Config, ContentTypesSubConfig}; +use crate::config::Config; mod collector; mod api_connection; mod data_structures; mod config; -mod interface; mod interfaces; fn main() { let args = data_structures::CliArgs::parse(); - - // Read config - let open_file = File::open(args.config.clone()) - .unwrap_or_else(|e| panic!("Config path could not be opened: {}", e.to_string())); - let reader = BufReader::new(open_file); - let config: Config = serde_yaml::from_reader(reader) - .unwrap_or_else(|e| panic!("Config could not be parsed: {}", e.to_string())); + let config = Config::new(args.config.clone()); let runs = config.get_needed_runs(); - let mut collector = Collector::new(args, config, runs); - - collector.run_once(); + collector.monitor(); } diff --git a/todo b/todo deleted file mode 100644 index 7009074..0000000 --- a/todo +++ /dev/null @@ -1,12 +0,0 @@ - -make build containers - -filter -global timeout -subscribe - -graylog interface -sql interface - -get rid of unwraps -tidy up \ No newline at end of file