Skip to content

Commit

Permalink
fixed attachment name decoding for UTF-8 names which are encoded in A…
Browse files Browse the repository at this point in the history
…SCII
  • Loading branch information
wahlflo committed Apr 30, 2021
1 parent 99f1bb0 commit 3489e82
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
23 changes: 19 additions & 4 deletions eml_analyzer/cli_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from email.message import Message
import re
from cli_formatter.output_formatting import colorize_string, Color, warning, error, info, print_headline_banner
import base64
import binascii


def show_header(parsed_eml: Message):
Expand Down Expand Up @@ -142,7 +144,7 @@ def show_attachments(parsed_eml: Message):
attachments = list()
for child in parsed_eml.walk():
if child.get_filename() is not None:
attachment_filename = _get_save_filename_from_attachment(attachment=child)
attachment_filename = _get_printable_attachment_filename(attachment=child)
attachments.append((attachment_filename, str(child.get_content_type()), str(child.get_content_disposition())))
if len(attachments) == 0:
info('E-Mail contains no attachments')
Expand Down Expand Up @@ -171,7 +173,7 @@ def extract_attachment(parsed_eml: Message, attachment_number: int, output_path:
error('Attachment {} could not be found'.format(attachment_number))
return

attachment_filename = _get_save_filename_from_attachment(attachment=attachment)
attachment_filename = _get_printable_attachment_filename(attachment=attachment)

info('Found attachment [{}] "{}"'.format(attachment_number, attachment_filename))

Expand Down Expand Up @@ -202,7 +204,7 @@ def extract_all_attachments(parsed_eml: Message, path: str or None):
continue
counter += 1

attachment_filename = _get_save_filename_from_attachment(attachment=child)
attachment_filename = _get_printable_attachment_filename(attachment=child)

output_path = os.path.join(path, attachment_filename)

Expand All @@ -214,9 +216,12 @@ def extract_all_attachments(parsed_eml: Message, path: str or None):
info('Attachment [{}] "{}" extracted to {}'.format(counter, attachment_filename, output_path))


def _get_save_filename_from_attachment(attachment: Message) -> str:
def _get_printable_attachment_filename(attachment: Message) -> str:
""" returns a valid filename for a given attachment name """
attachment_name = attachment.get_filename()

attachment_name = _decode_acii_encoded_utf8_string(string=attachment_name)

additional_allowed_chars = {'_', '.', '(', ')', '-', ' '}
clean_name = ''
for x in attachment_name:
Expand All @@ -227,6 +232,16 @@ def _get_save_filename_from_attachment(attachment: Message) -> str:
return clean_name


def _decode_acii_encoded_utf8_string(string: str) -> str:
""" decodes ASCII strings which are encoded like: name := "?UTF-8?B?" + base64_encode(filename) + "?=" """
for match in list(re.finditer(pattern=r'=\?utf-8\?B\?(.+?)\?=', string=string)):
try:
string = string.replace(match.group(0), base64.b64decode(match.group(1)).decode('utf-8'))
except binascii.Error:
pass
return string


def main():
argument_parser = argparse.ArgumentParser(usage='emlAnalyzer [OPTION]... -i FILE', description='A cli script to analyze an E-Mail in the eml format for viewing the header, extracting attachments etc.')
argument_parser.add_argument('-i', '--input', help="path to the eml-file (is required)", type=str)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setuptools.setup(
name="eml-analyzer",
version="1.0.2",
version="1.0.3",
author="Florian Wahl",
author_email="[email protected]",
description="A cli script to analyze an E-Mail in the eml format for viewing the header, extracting attachments etc.",
Expand Down

0 comments on commit 3489e82

Please sign in to comment.