Skip to content

Commit

Permalink
Change file output to CSV
Browse files Browse the repository at this point in the history
Change file output to CSV with a config parameter for separator
  • Loading branch information
ddbnl committed Apr 14, 2022
1 parent 743d3cf commit 46456ac
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 18 deletions.
4 changes: 3 additions & 1 deletion ConfigExamples/fileOutput.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ collect:
output:
file:
enabled: True
path: 'output.txt'
path: 'output.txt'
separateByContentType: True
separator: ';'
6 changes: 4 additions & 2 deletions ConfigExamples/fullConfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ filter: # Only logs that match ALL filters for a content type are collected. Le
Audit.SharePoint:
DLP.All:
output:
file:
file: # CSV output
enabled: False
path: 'output.txt'
separateByContentType: True # Creates a separate CSV file for each content type, appends content name to path
path: 'output.csv'
separator: ';'
azureLogAnalytics:
enabled: False
workspaceId:
Expand Down
29 changes: 15 additions & 14 deletions Source/AuditLogCollector.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from Interfaces import AzureOMSInterface, GraylogInterface, PRTGInterface
from Interfaces import AzureOMSInterface, GraylogInterface, PRTGInterface, FileInterface
import AuditLogSubscriber
import ApiConnection
import os
Expand Down Expand Up @@ -51,7 +51,7 @@ def __init__(self, content_types=None, resume=True, fallback_time=None, skip_kno
self.filters = {}

self.file_output = file_output
self.output_path = output_path
self.file_interface = FileInterface.FileInterface(**kwargs)
self.azure_oms_output = azure_oms_output
self.azure_oms_interface = AzureOMSInterface.AzureOMSInterface(**kwargs)
self.graylog_output = graylog_output
Expand Down Expand Up @@ -151,7 +151,11 @@ def _load_file_output_config(self, config):
if 'enabled' in config['output']['file']:
self.file_output = config['output']['file']['enabled']
if 'path' in config['output']['file']:
self.output_path = config['output']['file']['path']
self.file_interface.output_path = config['output']['file']['path']
if 'separateByContentType' in config['output']['file']:
self.file_interface.separate_by_content_type = config['output']['file']['separateByContentType']
if 'separator' in config['output']['file']:
self.file_interface.separator = config['output']['file']['separator']

def _load_azure_log_analytics_output_config(self, config):
"""
Expand Down Expand Up @@ -242,6 +246,8 @@ def _finish_run(self):
if self.resume and self._last_run_times:
with open('last_run_times', 'w') as ofile:
json.dump(fp=ofile, obj=self._last_run_times)
if self.file_output:
self.file_interface.output()
if self.prtg_output:
self.prtg_interface.output()
self._log_statistics()
Expand Down Expand Up @@ -367,6 +373,8 @@ def _get_available_content(self, content_type, start_time):

def _start_interfaces(self):

if self.file_output:
self.file_interface.start()
if self.azure_oms_output:
self.azure_oms_interface.start()
if self.prtg_output:
Expand All @@ -376,6 +384,8 @@ def _start_interfaces(self):

def _stop_interfaces(self):

if self.file_output:
self.file_interface.stop()
if self.azure_oms_output:
self.azure_oms_interface.stop()
if self.prtg_output:
Expand Down Expand Up @@ -470,7 +480,7 @@ def _output_results(self, results, content_type):
:param results: list of JSON
"""
if self.file_output:
self._output_results_to_file(*results)
self.file_interface.send_messages(*results, content_type=content_type)
if self.prtg_output:
self.prtg_interface.send_messages(*results, content_type=content_type)
if self.graylog_output:
Expand All @@ -490,15 +500,6 @@ def _check_filters(self, log, content_type):
return False
return True

def _output_results_to_file(self, *results):
"""
Dump received JSON messages to a file.
:param results: retrieved JSON (dict)
"""
for result in results:
with open(self.output_path, 'a') as ofile:
ofile.write("{}\n".format(json.dumps(obj=result)))

def _add_known_log(self):
"""
Add a content ID to the known content file to avoid saving messages more than once.
Expand Down Expand Up @@ -677,7 +678,7 @@ def known_content(self):
tenant_id=argsdict['tenant_id'], secret_key=argsdict['secret_key'], client_key=argsdict['client_key'],
content_types=content_types, publisher_id=argsdict['publisher_id'], resume=argsdict['resume'],
fallback_time=fallback_time, skip_known_logs=argsdict['skip_known_logs'], log_path=argsdict['log_path'],
file_output=argsdict['file'], output_path=argsdict['output_path'], debug=argsdict['debug_logging'],
file_output=argsdict['file'], path=argsdict['output_path'], debug=argsdict['debug_logging'],
prtg_output=argsdict['prtg'],
azure_oms_output=argsdict['azure'], workspace_id=argsdict['azure_workspace'],
shared_key=argsdict['azure_key'],
Expand Down
2 changes: 1 addition & 1 deletion Source/Interfaces/AzureOMSInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,6 @@ def _post_data(self, body, log_type, time_generated):

response.close()
if 200 <= status_code <= 299:
logging.info('Accepted payload:' + body)
logging.debug('Accepted payload:' + body)
else:
raise RuntimeError("Unable to send to OMS with {}: {} ".format(status_code, json_output))
35 changes: 35 additions & 0 deletions Source/Interfaces/FileInterface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import logging
import os
from . import _Interface
import collections
import pandas


class FileInterface(_Interface.Interface):

def __init__(self, path='output', separate_by_content_type=True, separator=';', **kwargs):
"""
Interface to send logs to an Azure Log Analytics Workspace.
"""
super().__init__(**kwargs)
self.path = path
self.paths = {}
self.separate_by_content_type = separate_by_content_type
self.separator = separator
self.results = collections.defaultdict(pandas.DataFrame)

def _send_message(self, msg, content_type, **kwargs):

if content_type not in self.paths:
self.paths[content_type] = "{}_{}.csv".format(self.path, content_type.replace('.', '')) \
if self.separate_by_content_type else self.path
df = pandas.json_normalize(msg)
self.results[content_type] = pandas.concat([self.results[content_type], df])

def output(self):

for content_type, result in self.results.items():
result.to_csv(self.paths[content_type], index=False, sep=self.separator, mode='a',
header=not os.path.exists(self.paths[content_type]))


Binary file modified Source/requirements.txt
Binary file not shown.

0 comments on commit 46456ac

Please sign in to comment.