Skip to content

Commit

Permalink
Code review: 284330043: Fix cups ipp parser, and catch unicode errors…
Browse files Browse the repository at this point in the history
… during serialization.
  • Loading branch information
Onager committed Jan 22, 2016
1 parent c649668 commit 65b4421
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 40 deletions.
2 changes: 1 addition & 1 deletion config/dpkg/changelog
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ python-plaso (1.3.1-1) unstable; urgency=low

* Auto-generated

-- Log2Timeline <[email protected]> Don, 21 Jan 2016 08:29:48 +0100
-- Log2Timeline <[email protected]> Fri, 22 Jan 2016 16:34:11 +0100
2 changes: 1 addition & 1 deletion plaso/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
__version__ = '1.3.1'

VERSION_DEV = True
VERSION_DATE = '20160121'
VERSION_DATE = '20160122'


def GetVersion():
Expand Down
40 changes: 39 additions & 1 deletion plaso/lib/timelib.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@

import calendar
import datetime
import dateutil.parser
import logging
import time
import pytz

import dateutil.parser

from plaso.lib import errors


Expand Down Expand Up @@ -606,6 +607,43 @@ def FromPythonDatetime(cls, datetime_object):
posix_time = int(calendar.timegm(datetime_object.utctimetuple()))
return cls.FromPosixTime(posix_time) + datetime_object.microsecond

@classmethod
def FromRFC2579Datetime(
cls, year, month, day, hour, minutes, seconds, deciseconds,
direction_from_utc, hours_from_utc, minutes_from_utc):
"""Converts values from an RFC2579 time to a timestamp.
See https://tools.ietf.org/html/rfc2579.
Args:
year: An integer representing the year.
month: An integer between 1 and 12.
day: An integer representing the number of day in the month.
hour: An integer representing the hour, 0 <= hour < 24.
minutes: An integer, 0 <= minute < 60.
seconds: An integer, 0 <= second < 60.
deciseconds: An integer, 0 <= deciseconds < 10
direction_from_utc: An ascii character, either '+' or '-'.
hours_from_utc: An integer representing the number of hours the time is
offset from UTC.
minutes_from_utc: An integer representing the number of seconds the time
is offset from UTC.
Returns:
The timestamp which is an integer containing the number of micro seconds
since January 1, 1970, 00:00:00 UTC or 0 on error.
Raises:
TimestampError: if the timestamp cannot be created from the time parts.
"""
microseconds = deciseconds * 100000
utc_offset_minutes = (hours_from_utc * 60) + minutes_from_utc
if direction_from_utc == u'-':
utc_offset_minutes = -utc_offset_minutes
timezone = pytz.FixedOffset(utc_offset_minutes)
return cls.FromTimeParts(
year, month, day, hour, minutes, seconds, microseconds, timezone)

@classmethod
def FromTimeParts(
cls, year, month, day, hour, minutes, seconds, microseconds=0,
Expand Down
97 changes: 74 additions & 23 deletions plaso/parsers/cups_ipp.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from plaso.events import time_events
from plaso.lib import errors
from plaso.lib import eventdata
from plaso.lib import timelib
from plaso.parsers import interface
from plaso.parsers import manager

Expand All @@ -38,17 +39,17 @@
# TODO: Only tested against CUPS IPP Mac OS X.


class CupsIppEvent(time_events.PosixTimeEvent):
class CupsIppEvent(time_events.TimestampEvent):
"""Convenience class for an cups ipp event."""

DATA_TYPE = u'cups:ipp:event'

def __init__(self, posix_time, timestamp_description, data_dict):
def __init__(self, timestamp, timestamp_description, data_dict):
"""Initializes the event object.
Args:
posix_time: the POSIX time value, which contains the number of seconds
since January 1, 1970 00:00:00 UTC.
timestamp: the timestamp which is an integer containing the number
of micro seconds since January 1, 1970, 00:00:00 UTC.
timestamp_description: The usage string for the timestamp value.
data_dict: Dictionary with all the pairs coming from IPP file.
user: String with the system user name.
Expand All @@ -63,7 +64,7 @@ def __init__(self, posix_time, timestamp_description, data_dict):
doc_type: String with the type of document.
data_dict: Dictionary with all the parsed data coming from the file.
"""
super(CupsIppEvent, self).__init__(posix_time, timestamp_description)
super(CupsIppEvent, self).__init__(timestamp, timestamp_description)
# TODO: Find a better solution than to have join for each attribute.
self.user = self._ListToString(data_dict.get(u'user', None))
self.owner = self._ListToString(data_dict.get(u'owner', None))
Expand Down Expand Up @@ -98,14 +99,10 @@ def _ListToString(self, values):
return

for index, value in enumerate(values):
if ',' in value:
if u',' in value:
values[index] = u'"{0:s}"'.format(value)

try:
return u', '.join(values)
except UnicodeDecodeError as exception:
logging.error(
u'Unable to parse log line, with error: {0:s}'.format(exception))
return u', '.join(values)


class CupsIppParser(interface.FileObjectParser):
Expand Down Expand Up @@ -152,17 +149,19 @@ class CupsIppParser(interface.FileObjectParser):
# Identification Groups.
GROUP_LIST = [1, 2, 4, 5, 6, 7]

# Type ID.
TYPE_GENERAL_INTEGER = 32
TYPE_INTEGER = 33
TYPE_ENUMERATION = 35
TYPE_BOOL = 34
# Type ID, per cups source file ipp-support.c.
TYPE_GENERAL_INTEGER = 0x20
TYPE_INTEGER = 0x21
TYPE_BOOL = 0x22
TYPE_ENUMERATION = 0x23
TYPE_DATETIME = 0x31

# Type of values that can be extracted.
INTEGER_8 = construct.UBInt8(u'integer')
INTEGER_32 = construct.UBInt32(u'integer')
TEXT = construct.PascalString(
u'text',
encoding='utf-8',
length_field=construct.UBInt8(u'length'))
BOOLEAN = construct.Struct(
u'boolean_value',
Expand All @@ -173,6 +172,22 @@ class CupsIppParser(interface.FileObjectParser):
construct.Padding(1),
INTEGER_32)

# This is an RFC 2579 datetime.
DATETIME = construct.Struct(
u'datetime',
construct.Padding(1),
construct.UBInt16(u'year'),
construct.UBInt8(u'month'),
construct.UBInt8(u'day'),
construct.UBInt8(u'hour'),
construct.UBInt8(u'minutes'),
construct.UBInt8(u'seconds'),
construct.UBInt8(u'deciseconds'),
construct.String(u'direction_from_utc', length=1, encoding='ascii'),
construct.UBInt8(u'hours_from_utc'),
construct.UBInt8(u'minutes_from_utc'),
)

# Name of the pair.
PAIR_NAME = construct.Struct(
u'pair_name',
Expand Down Expand Up @@ -229,24 +244,52 @@ def ParseFileObject(self, parser_mediator, file_object, **kwargs):
data_dict.setdefault(pretty_name, []).append(value)
name, value = self.ReadPair(parser_mediator, file_object)

if u'time-at-creation' in data_dict:
# TODO: Refactor to use a lookup table to do event production.
time_dict = {}
for key, value in data_dict.items():
if key.startswith(u'date-time-') or key.startswith(u'time-'):
time_dict[key] = value
del data_dict[key]

if u'date-time-at-creation' in time_dict:
event_object = CupsIppEvent(
data_dict[u'time-at-creation'][0],
time_dict[u'date-time-at-creation'][0],
eventdata.EventTimestamp.CREATION_TIME, data_dict)
parser_mediator.ProduceEvent(event_object)

if u'time-at-processing' in data_dict:
if u'date-time-at-processing' in time_dict:
event_object = CupsIppEvent(
data_dict[u'time-at-processing'][0],
time_dict[u'date-time-at-processing'][0],
eventdata.EventTimestamp.START_TIME, data_dict)
parser_mediator.ProduceEvent(event_object)

if u'time-at-completed' in data_dict:
if u'date-time-at-completed' in time_dict:
event_object = CupsIppEvent(
data_dict[u'time-at-completed'][0],
time_dict[u'date-time-at-completed'][0],
eventdata.EventTimestamp.END_TIME, data_dict)
parser_mediator.ProduceEvent(event_object)

if u'time-at-creation' in time_dict:
time_value = time_dict[u'time-at-creation'][0]
timestamp = timelib.Timestamp.FromPosixTime(time_value)
event_object = CupsIppEvent(
timestamp, eventdata.EventTimestamp.CREATION_TIME, data_dict)
parser_mediator.ProduceEvent(event_object)

if u'time-at-processing' in time_dict:
time_value = time_dict[u'time-at-processing'][0]
timestamp = timelib.Timestamp.FromPosixTime(time_value)
event_object = CupsIppEvent(
timestamp, eventdata.EventTimestamp.START_TIME, data_dict)
parser_mediator.ProduceEvent(event_object)

if u'time-at-completed' in time_dict:
time_value = time_dict[u'time-at-completed'][0]
timestamp = timelib.Timestamp.FromPosixTime(time_value)
event_object = CupsIppEvent(
timestamp, eventdata.EventTimestamp.END_TIME, data_dict)
parser_mediator.ProduceEvent(event_object)

def ReadPair(self, parser_mediator, file_object):
"""Reads an attribute name and value pair from a CUPS IPP event.
Expand Down Expand Up @@ -299,10 +342,18 @@ def ReadPair(self, parser_mediator, file_object):
elif type_id == self.TYPE_BOOL:
value = bool(self.BOOLEAN.parse_stream(file_object).integer)

elif type_id == self.TYPE_DATETIME:
datetime = self.DATETIME.parse_stream(file_object)
value = timelib.Timestamp.FromRFC2579Datetime(
datetime.year, datetime.month, datetime.day, datetime.hour,
datetime.minutes, datetime.seconds, datetime.deciseconds,
datetime.direction_from_utc, datetime.hours_from_utc,
datetime.minutes_from_utc)

else:
value = self.TEXT.parse_stream(file_object)

except (IOError, construct.FieldError):
except (IOError, UnicodeDecodeError, construct.FieldError):
logging.warning(
u'[{0:s}] Unsupported value in file: {1:s}.'.format(
self.NAME, parser_mediator.GetDisplayName()))
Expand Down
37 changes: 23 additions & 14 deletions plaso/storage/zip_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,29 @@ def AddEventObject(self, event_object):
if not self._zipfile:
raise IOError(u'Trying to add an entry to a closed storage file.')

# We try to serialize the event object first, so we can skip some
# processing if it's invalid.
if self._serializers_profiler:
self._serializers_profiler.StartTiming(u'event_object')
try:
event_object_data = self._event_object_serializer.WriteSerialized(
event_object)
# TODO: Re-think this approach with the re-design of the storage.
# Check if the event object failed to serialize (none is returned).
if event_object_data is None:
return
except UnicodeDecodeError:
error_message = (
u'Unicode error while serializing event. It will be excluded from '
u'output. Details: Event: "{0:s}" data type: "{1:s}" '
u'parser: "{2:s}"').format(
event_object.uuid, event_object.data_type, event_object.parser)
logging.error(error_message)
return
finally:
if self._serializers_profiler:
self._serializers_profiler.StopTiming(u'event_object')

if event_object.timestamp > self._buffer_last_timestamp:
self._buffer_last_timestamp = event_object.timestamp

Expand All @@ -1535,20 +1558,6 @@ def AddEventObject(self, event_object):
parser = attributes.get(u'parser', u'unknown_parser')
self._count_parser[parser] += 1

if self._serializers_profiler:
self._serializers_profiler.StartTiming(u'event_object')

event_object_data = self._event_object_serializer.WriteSerialized(
event_object)

if self._serializers_profiler:
self._serializers_profiler.StopTiming(u'event_object')

# TODO: Re-think this approach with the re-design of the storage.
# Check if the event object failed to serialize (none is returned).
if event_object_data is None:
return

heapq.heappush(
self._buffer, (event_object.timestamp, event_object_data))
self._buffer_size += len(event_object_data)
Expand Down
1 change: 1 addition & 0 deletions tests/parsers/cups_ipp.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def testParse(self):
self.assertEqual(
event_object.timestamp_desc,
eventdata.EventTimestamp.CREATION_TIME)

self.assertEqual(event_object.application, u'LibreOffice')
self.assertEqual(event_object.job_name, u'Assignament 1')
self.assertEqual(event_object.computer_name, u'localhost')
Expand Down

0 comments on commit 65b4421

Please sign in to comment.