Change file output to CSV

Change file output to CSV with a config parameter for separator
ddbnl · Apr 14, 2022 · 46456ac · 46456ac
1 parent 743d3cf
commit 46456ac
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 18 deletions.
diff --git a/ConfigExamples/fileOutput.yaml b/ConfigExamples/fileOutput.yaml
@@ -10,4 +10,6 @@ collect:
 output:
   file:
     enabled: True
-    path: 'output.txt'
+    path: 'output.txt'
+    separateByContentType: True
+    separator: ';'
diff --git a/ConfigExamples/fullConfig.yaml b/ConfigExamples/fullConfig.yaml
@@ -22,9 +22,11 @@ filter:  # Only logs that match ALL filters for a content type are collected. Le
   Audit.SharePoint:
   DLP.All:
 output:
-  file:
+  file:  # CSV output
     enabled: False
-    path: 'output.txt'
+    separateByContentType: True  # Creates a separate CSV file for each content type, appends content name to path
+    path: 'output.csv'
+    separator: ';'
   azureLogAnalytics:
     enabled: False
     workspaceId:

diff --git a/Source/AuditLogCollector.py b/Source/AuditLogCollector.py
@@ -1,4 +1,4 @@
-from Interfaces import AzureOMSInterface, GraylogInterface, PRTGInterface
+from Interfaces import AzureOMSInterface, GraylogInterface, PRTGInterface, FileInterface
 import AuditLogSubscriber
 import ApiConnection
 import os
@@ -51,7 +51,7 @@ def __init__(self, content_types=None, resume=True, fallback_time=None, skip_kno
         self.filters = {}
 
         self.file_output = file_output
-        self.output_path = output_path
+        self.file_interface = FileInterface.FileInterface(**kwargs)
         self.azure_oms_output = azure_oms_output
         self.azure_oms_interface = AzureOMSInterface.AzureOMSInterface(**kwargs)
         self.graylog_output = graylog_output
@@ -151,7 +151,11 @@ def _load_file_output_config(self, config):
             if 'enabled' in config['output']['file']:
                 self.file_output = config['output']['file']['enabled']
             if 'path' in config['output']['file']:
-                self.output_path = config['output']['file']['path']
+                self.file_interface.output_path = config['output']['file']['path']
+            if 'separateByContentType' in config['output']['file']:
+                self.file_interface.separate_by_content_type = config['output']['file']['separateByContentType']
+            if 'separator' in config['output']['file']:
+                self.file_interface.separator = config['output']['file']['separator']
 
     def _load_azure_log_analytics_output_config(self, config):
         """
@@ -242,6 +246,8 @@ def _finish_run(self):
         if self.resume and self._last_run_times:
             with open('last_run_times', 'w') as ofile:
                 json.dump(fp=ofile, obj=self._last_run_times)
+        if self.file_output:
+            self.file_interface.output()
         if self.prtg_output:
             self.prtg_interface.output()
         self._log_statistics()
@@ -367,6 +373,8 @@ def _get_available_content(self, content_type, start_time):
 
     def _start_interfaces(self):
 
+        if self.file_output:
+            self.file_interface.start()
         if self.azure_oms_output:
             self.azure_oms_interface.start()
         if self.prtg_output:
@@ -376,6 +384,8 @@ def _start_interfaces(self):
 
     def _stop_interfaces(self):
 
+        if self.file_output:
+            self.file_interface.stop()
         if self.azure_oms_output:
             self.azure_oms_interface.stop()
         if self.prtg_output:
@@ -470,7 +480,7 @@ def _output_results(self, results, content_type):
         :param results: list of JSON
         """
         if self.file_output:
-            self._output_results_to_file(*results)
+            self.file_interface.send_messages(*results, content_type=content_type)
         if self.prtg_output:
             self.prtg_interface.send_messages(*results, content_type=content_type)
         if self.graylog_output:
@@ -490,15 +500,6 @@ def _check_filters(self, log, content_type):
                     return False
         return True
 
-    def _output_results_to_file(self, *results):
-        """
-        Dump received JSON messages to a file.
-        :param results: retrieved JSON (dict)
-        """
-        for result in results:
-            with open(self.output_path, 'a') as ofile:
-                ofile.write("{}\n".format(json.dumps(obj=result)))
-
     def _add_known_log(self):
         """
         Add a content ID to the known content file to avoid saving messages more than once.
@@ -677,7 +678,7 @@ def known_content(self):
         tenant_id=argsdict['tenant_id'], secret_key=argsdict['secret_key'], client_key=argsdict['client_key'],
         content_types=content_types, publisher_id=argsdict['publisher_id'], resume=argsdict['resume'],
         fallback_time=fallback_time, skip_known_logs=argsdict['skip_known_logs'], log_path=argsdict['log_path'],
-        file_output=argsdict['file'], output_path=argsdict['output_path'], debug=argsdict['debug_logging'],
+        file_output=argsdict['file'], path=argsdict['output_path'], debug=argsdict['debug_logging'],
         prtg_output=argsdict['prtg'],
         azure_oms_output=argsdict['azure'], workspace_id=argsdict['azure_workspace'],
         shared_key=argsdict['azure_key'],

diff --git a/Source/Interfaces/AzureOMSInterface.py b/Source/Interfaces/AzureOMSInterface.py
@@ -118,6 +118,6 @@ def _post_data(self, body, log_type, time_generated):
 
         response.close()
         if 200 <= status_code <= 299:
-            logging.info('Accepted payload:' + body)
+            logging.debug('Accepted payload:' + body)
         else:
             raise RuntimeError("Unable to send to OMS with {}: {} ".format(status_code, json_output))
diff --git a/Source/Interfaces/FileInterface.py b/Source/Interfaces/FileInterface.py
@@ -0,0 +1,35 @@
+import logging
+import os
+from . import _Interface
+import collections
+import pandas
+
+
+class FileInterface(_Interface.Interface):
+
+    def __init__(self, path='output', separate_by_content_type=True, separator=';', **kwargs):
+        """
+        Interface to send logs to an Azure Log Analytics Workspace.
+        """
+        super().__init__(**kwargs)
+        self.path = path
+        self.paths = {}
+        self.separate_by_content_type = separate_by_content_type
+        self.separator = separator
+        self.results = collections.defaultdict(pandas.DataFrame)
+
+    def _send_message(self, msg, content_type, **kwargs):
+
+        if content_type not in self.paths:
+            self.paths[content_type] = "{}_{}.csv".format(self.path, content_type.replace('.', '')) \
+                if self.separate_by_content_type else self.path
+        df = pandas.json_normalize(msg)
+        self.results[content_type] = pandas.concat([self.results[content_type], df])
+
+    def output(self):
+
+        for content_type, result in self.results.items():
+            result.to_csv(self.paths[content_type], index=False, sep=self.separator, mode='a',
+                          header=not os.path.exists(self.paths[content_type]))
+
+
diff --git a/Source/requirements.txt b/Source/requirements.txt