From b672599c9292f343a4da693d3102f2ec955bd548 Mon Sep 17 00:00:00 2001 From: Elias Kunnas Date: Sun, 1 Jan 2017 11:53:39 -0300 Subject: [PATCH 1/2] Work around crazy emails with non-base64 encoded attachments GMail IMAP push will get in a weird state if it encounters an email which is encoded in this manner. The connection will be reset after a long timeout after encountering this binary crap. I suspect an earlier version of GMail server software, or some non-IMAP import allows or allowed this in at some point, and yet it is not possible to upload it back verbatim. For my purposes, it suffices to skip these rare species but in general the email parts should be re-encoded as base64, or the problematic invalidly encoded attachments should be removed. The illegally encoded binary emails are detected here by looking for the PNG magic string with bytes that SHOULD NOT appear in any legitimate email data. A screenshot of one of the affected emails is at https://imgur.com/a/i09Xa. --- src/gmv/imap_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gmv/imap_utils.py b/src/gmv/imap_utils.py index f30be16c..0b823b43 100755 --- a/src/gmv/imap_utils.py +++ b/src/gmv/imap_utils.py @@ -799,7 +799,11 @@ def push_data(self, a_folder, a_body, a_flags, a_internal_time): res = None try: #a_body = self._clean_email_body(a_body) - res = self.server.append(a_folder, a_body, a_flags, a_internal_time) + if b'\xc2\x89PNG' in a_body: + raise PushEmailError("Skip bad email with binary data. Quarantine this email.", quarantined = True) + else: + res = self.server.append(a_folder, a_body, a_flags, a_internal_time) + except imaplib.IMAP4.abort, err: # handle issue when there are invalid characters (This is do to the presence of null characters) if str(err).find("APPEND => Invalid character in literal") >= 0: From 0826037da74dede5079358fdd812482cc3291b66 Mon Sep 17 00:00:00 2001 From: Elias Kunnas Date: Thu, 4 May 2017 19:46:18 +0300 Subject: [PATCH 2/2] Generalize checking against bad base64 data This commit tests all base64-encoded parts of emails to push. This generalizes the previous ad hoc method which only worked for detecting the PNG magic header anywhere in the message. If a email parts/payloads claim to be base64 encoded but contain invalid characters outside the base64 alphabet, the email is never pushed to server. The erroneous emails are NOT sent to quarantine since they are assumed/observed to be both rare and can be detected client side (and notably CANNOT be detected server-side as such because they break the protocol contract). --- src/gmv/imap_utils.py | 78 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/src/gmv/imap_utils.py b/src/gmv/imap_utils.py index 0b823b43..f4c8ce4d 100755 --- a/src/gmv/imap_utils.py +++ b/src/gmv/imap_utils.py @@ -36,23 +36,76 @@ import gmv.gmvault_utils as gmvault_utils import gmv.mod_imap as mimap +import email.parser + LOG = log_utils.LoggerFactory.get_logger('imap_utils') + +re_valid_base64 = re.compile('^[A-Za-z0-9+/=\r\n]*$') +def is_valid_base64_email_payload(payload): + """Check if a base64-encoded email payload is valid + + A payload is a part of a multipart message for instance. + + NOTE: There is no base64 alphabet validating function + in the standard library so we use our own. + We only permit the standard base64 alphabet and CRLF. + """ + re_match = re_valid_base64.match(payload) + return re_match is not None + + +def check_email_payload_encodings(email_body): + """ + Check whether emails-to-push have correctly specified encodings. + If there is binary data where base64 is expected, gmail will break + and interpret the ("random") binary data as part of the SMTP or + IMAP protocol metadata or something. + + See: https://github.com/gaubert/gmvault/pull/283 + """ + parser = email.parser.Parser() + message = parser.parsestr(email_body) + + for submsg in message.walk(): + # We can only check non-multipart, parts, for validity. + if submsg.is_multipart(): + continue + + cte = submsg.get('content-transfer-encoding', '').lower() + + # Only "falsely" base64-encoded emails have had problems with gmail servers so far, + # so we don't bother trying to validate the rest. + if cte != 'base64': + continue + + if not is_valid_base64_email_payload(submsg._payload): + raise PushEmailError( + "Bad data encoding: base64-encoded data contains non-base64 characters.", + permanent = True, + ) + + class PushEmailError(Exception): """ PushEmail Error """ - def __init__(self, a_msg, quarantined = False): + def __init__(self, a_msg, quarantined = False, permanent = False): """ Constructor """ super(PushEmailError, self).__init__(a_msg) self._in_quarantine = quarantined + self._permanent = permanent def quarantined(self): """ Get email to quarantine """ return self._in_quarantine + def is_permanent(self): + """ Is the error permanent? Should we attempt to retry? """ + return self._permanent + class LabelError(Exception): """ LabelError. Exception send when there is an error when adding labels to message @@ -134,12 +187,17 @@ def wrapper(*args, **kwargs): #pylint:disable=C0111,R0912 LOG.debug("error message = %s. traceback:%s" % (p_err, gmvault_utils.get_exception_traceback())) - if nb_tries[0] < a_nb_tries: - LOG.critical("Cannot reach the Gmail server. Wait %s second(s) and retrying." % (m_sleep_time[0])) + if p_err.is_permanent(): + LOG.critical('Permanent error in email message: {}'.format(p_err)) + break + else: - LOG.critical("Stop retrying, tried too many times ...") - - reconnect(args[0], nb_tries, a_nb_tries, p_err, m_sleep_time) + if nb_tries[0] < a_nb_tries: + LOG.critical("Cannot reach the Gmail server. Wait %s second(s) and retrying." % (m_sleep_time[0])) + else: + LOG.critical("Stop retrying, tried too many times ...") + + reconnect(args[0], nb_tries, a_nb_tries, p_err, m_sleep_time) except imaplib.IMAP4.abort, err: #abort is recoverable and error is not @@ -796,13 +854,11 @@ def push_data(self, a_folder, a_body, a_flags, a_internal_time): #msg = "a_folder = %s" % (a_folder.encode('utf-8')) #msg = msg.encode('utf-8') #print(msg) + check_email_payload_encodings(a_body) + res = None try: - #a_body = self._clean_email_body(a_body) - if b'\xc2\x89PNG' in a_body: - raise PushEmailError("Skip bad email with binary data. Quarantine this email.", quarantined = True) - else: - res = self.server.append(a_folder, a_body, a_flags, a_internal_time) + res = self.server.append(a_folder, a_body, a_flags, a_internal_time) except imaplib.IMAP4.abort, err: # handle issue when there are invalid characters (This is do to the presence of null characters)