MultiPart upload since cgi.FieldStorage not available in Python 3.13

Based on the work in bottlepy#1438 Signed-off-by: Oz Tiram <[email protected]>
veilchen-web · May 25, 2024 · 9861d04 · 9861d04
1 parent 2f11bc8
commit 9861d04
Show file tree

Hide file tree

Showing 4 changed files with 548 additions and 57 deletions.
diff --git a/test/test_environ.py b/test/test_environ.py
@@ -349,7 +349,6 @@ def test_multipart(self):
         self.assertEqual('value1', request.POST['field1'])
         self.assertTrue('field1' not in request.files)
         self.assertEqual('value1', request.forms['field1'])
-        print(request.forms.dict, request.forms.recode_unicode)
         self.assertEqual('万难', request.forms['field2'])
         self.assertEqual(touni('万难'), request.forms.field2)
         # Field (multi)

diff --git a/test/test_multipart.py b/test/test_multipart.py
@@ -0,0 +1,257 @@
+# -*- coding: utf-8 -*-
+import unittest
+import base64
+import sys, os.path, tempfile
+from io import BytesIO
+
+import veilchen
+
+class BaseMultipartTest(unittest.TestCase):
+    def setUp(self):
+        self.data = BytesIO()
+        self.parts = None
+
+    def write(self, *lines):
+        for line in lines:
+            self.data.write(veilchen.tob(line))
+
+    def parse(self, ctype=None, clen=-1, **kwargs):
+        self.data.seek(0)
+        h = veilchen._parse_http_header(ctype or "multipart/form-data; boundary=foo")
+        charset = h[0][1].get("charset", "utf8")
+        boundary = h[0][1].get("boundary")
+        parser = veilchen._MultipartParser(self.data, boundary, clen, **kwargs)
+        return list(parser.parse())
+
+    def assertFile(self, name, filename, ctype, data):
+        for part in self.parts:
+            if part.name != name: continue
+            self.assertEqual(part.filename, expected[0])
+            self.assertEqual(part.content_type, expected[1])
+            self.assertEqual(part.file.read(), veilchen.tob(expected[2]))
+            break
+        else:
+            self.fail("Field %s not found" % name)
+
+    def assertForm(self, name, data):
+        for part in self.parts:
+            if part.name != name: continue
+            self.assertEqual(part.filename, None)
+            self.assertEqual(part.content_type, None)
+            self.assertEqual(part.value, data)
+            break
+        else:
+            self.fail("Field %s not found" % name)
+
+
+class TestHeaderParser(BaseMultipartTest):
+
+    def test_options_parser(self):
+        parse = veilchen._parse_http_header
+        self.assertEqual(
+            parse('form-data; name="Test"; filename="Test.txt"'),
+            [('form-data', {"name": "Test", "filename": "Test.txt"})])
+        self.assertEqual(parse('form-data; name="Test"; FileName="Te\\"st.txt"'),
+        [('form-data', {"name": "Test", "filename": "Te\"st.txt"})])
+        self.assertEqual(parse('form-data; name="Test"; filename="C:\\test\\bla.txt"'),
+        [('form-data', {"name": "Test", "filename": "C:\\test\\bla.txt"})])
+        self.assertEqual(parse('form-data; name="Test"; filename="\\\\test\\bla.txt"'),
+        [('form-data', {"name": "Test", "filename": "\\\\test\\bla.txt"})])
+
+
+class TestMultipartParser(BaseMultipartTest):
+
+    def assertIterline(self, data, *expected, **options):
+        self.assertEqual(
+            list(veilchen._MultipartParser(BytesIO(veilchen.tob(data)), 'foo', **options)._lineiter()),
+            [(veilchen.tob(l), veilchen.tob(nl)) for l,nl in expected])
+
+    def test_iterlines(self):
+        self.assertIterline('abc\ndef\r\nghi', ('abc\ndef','\r\n'), ('ghi', ''))
+
+    def test_iterlines_limit(self):
+        self.assertIterline('abc\ndef\r\nghi', ('abc\ndef','\r\n'), ('g', ''), content_length=10)
+        self.assertIterline('abc\ndef\r\nghi', ('abc\ndef\r',''), content_length=8)
+
+    def test_fuzzy_lineiter(self):
+        """ Test all possible buffer sizes """
+        minbuflen = 9 # boundary size of '--foo--\r\n'
+        data = b'data\rdata\ndata\r\ndata\n\rdata\r\n'.replace(b'data', b'X'*minbuflen*2)
+        lines = data.split(b"\r\n")[:-1]
+        for tail in (b"", b"tail"):
+            for buffer_size in range(minbuflen, len(data+tail)+1):
+                splits = list(veilchen._MultipartParser(
+                    BytesIO(data+tail), 'foo',
+                    buffer_size=buffer_size)._lineiter())
+                partial = b""
+                merged = []
+                for part, nl in splits:
+                    self.assertTrue(nl in (b"", b"\r\n"))
+                    self.assertTrue(len(part) >= buffer_size or nl or part == tail)
+                    partial += part
+                    if nl:
+                        merged.append(partial)
+                        partial = b""
+                self.assertEqual(merged, lines)
+                self.assertEqual(tail, partial)
+
+    def test_big_file(self):
+        ''' If the size of an uploaded part exceeds memfile_limit,
+            it is written to disk. '''
+        test_file = 'abc'*1024
+        boundary = '---------------------------186454651713519341951581030105'
+        request = BytesIO(veilchen.tob('\r\n').join(map(veilchen.tob,[
+        '--' + boundary,
+        'Content-Disposition: form-data; name="file1"; filename="random.png"',
+        'Content-Type: image/png', '', test_file, '--' + boundary,
+        'Content-Disposition: form-data; name="file2"; filename="random.png"',
+        'Content-Type: image/png', '', test_file + 'a', '--' + boundary,
+        'Content-Disposition: form-data; name="file3"; filename="random.png"',
+        'Content-Type: image/png', '', test_file*2, '--'+boundary+'--',''])))
+        parts = list(veilchen._MultipartParser(request, boundary, memfile_limit=len(test_file)).parse())
+        p = {p.name: p for p in parts}
+        try:
+            self.assertEqual(p.get('file1').file.read(), veilchen.tob(test_file))
+            self.assertTrue(p.get('file1').is_buffered())
+            self.assertEqual(p.get('file2').file.read(), veilchen.tob(test_file + 'a'))
+            self.assertFalse(p.get('file2').is_buffered())
+            self.assertEqual(p.get('file3').file.read(), veilchen.tob(test_file*2))
+            self.assertFalse(p.get('file3').is_buffered())
+        finally:
+            for part in parts:
+                part.close()
+
+    def test_file_seek(self):
+        ''' The file object should be readable withoud a seek(0). '''
+        test_file = 'abc'*1024
+        boundary = '---------------------------186454651713519341951581030105'
+        request = BytesIO(veilchen.tob('\r\n').join(map(veilchen.tob,[
+        '--' + boundary,
+        'Content-Disposition: form-data; name="file1"; filename="random.png"',
+        'Content-Type: image/png', '', test_file, '--' + boundary + '--',''])))
+        p = list(veilchen._MultipartParser(request, boundary).parse())
+        self.assertEqual(p[0].file.read(), veilchen.tob(test_file))
+        self.assertEqual(p[0].value, test_file)
+
+    def test_unicode_value(self):
+        ''' The .value property always returns unicode '''
+        test_file = 'abc'*1024
+        boundary = '---------------------------186454651713519341951581030105'
+        request = BytesIO(veilchen.tob('\r\n').join(map(veilchen.tob,[
+        '--' + boundary,
+        'Content-Disposition: form-data; name="file1"; filename="random.png"',
+        'Content-Type: image/png', '', test_file, '--' + boundary + '--',''])))
+        p = list(veilchen._MultipartParser(request, boundary).parse())
+        self.assertEqual(p[0].file.read(), veilchen.tob(test_file))
+        self.assertEqual(p[0].value, test_file)
+        self.assertTrue(hasattr(p[0].value, 'encode'))
+
+    def test_multiline_header(self):
+        ''' HTTP allows headers to be multiline. '''
+        test_file = veilchen.tob('abc'*1024)
+        test_text = u'Test text\n with\r\n ümläuts!'
+        boundary = '---------------------------186454651713519341951581030105'
+        request = BytesIO(veilchen.tob('\r\n').join(map(veilchen.tob,[
+        '--' + boundary,
+        'Content-Disposition: form-data;',
+        '\tname="file1"; filename="random.png"',
+        'Content-Type: image/png', '', test_file, '--' + boundary,
+        'Content-Disposition: form-data;',
+        ' name="text"', '', test_text,
+        '--' + boundary + '--',''])))
+        p = list(veilchen._MultipartParser(request, boundary, charset='utf8').parse())
+        self.assertEqual(p[0].name, "file1")
+        self.assertEqual(p[0].file.read(), test_file)
+        self.assertEqual(p[0].filename, 'random.png')
+        self.assertEqual(p[1].name, "text")
+        self.assertEqual(p[1].value, test_text)
+
+
+class TestBrokenMultipart(BaseMultipartTest):
+
+    def assertMPError(self, **ka):
+        self.assertRaises(veilchen.MultipartError, self.parse, **ka)
+
+    def test_big_boundary(self):
+        self.assertMPError(buffer_size=1024*3)
+
+    def test_missing_content_type(self):
+        self.assertMPError(ctype="")
+
+    def test_unsupported_content_type(self):
+        self.assertMPError(ctype='multipart/fantasy')
+
+    def test_missing_boundary(self):
+        self.assertMPError(ctype="multipart/form-data")
+
+    def test_no_terminator(self):
+        self.write('--foo\r\n',
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc')
+        self.assertMPError()
+
+    def test_no_newline_after_content(self):
+        self.write('--foo\r\n',
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc', '--foo--')
+        self.assertMPError()
+
+    def test_no_newline_after_middle_content(self):
+        self.write('--foo\r\n',
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc', '--foo\r\n'
+                   'Content-Disposition: form-data; name="file2"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc\r\n', '--foo--')
+        parts = self.parse()
+        self.assertEqual(len(parts), 1)
+        self.assertTrue('name="file2"' in parts[0].value)
+
+    def test_preamble_before_start_boundary(self):
+        parts = self.write('Preamble\r\n', '--foo\r\n'
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc\r\n', '--foo--')
+        parts = self.parse()
+        self.assertEqual(parts[0].file.read(), veilchen.tob('abc'))
+        self.assertEqual(parts[0].filename, 'random.png')
+        self.assertEqual(parts[0].name, 'file1')
+        self.assertEqual(parts[0].content_type, 'image/png')
+
+    def test_no_start_boundary(self):
+        self.write('--bar\r\n','--nonsense\r\n'
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc\r\n', '--nonsense--')
+        self.assertMPError()
+
+    def test_disk_limit(self):
+        self.write('--foo\r\n',
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc'*1024+'\r\n', '--foo--')
+        self.assertMPError(memfile_limit=0, disk_limit=1024)
+
+    def test_mem_limit(self):
+        self.write('--foo\r\n',
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc'*1024+'\r\n', '--foo\r\n',
+                   'Content-Disposition: form-data; name="file2"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc'*1024+'\r\n', '--foo--')
+        self.assertMPError(mem_limit=1024*3)
+
+    def test_invalid_header(self):
+        self.write('--foo\r\n',
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n',
+                   'Bad header\r\n', '\r\n', 'abc'*1024+'\r\n', '--foo--')
+        self.assertMPError()
+
+    def test_content_length_to_small(self):
+        self.write('--foo\r\n',
+                   'Content-Disposition: form-data; name="file1"; filename="random.png"\r\n',
+                   'Content-Type: image/png\r\n',
+                   'Content-Length: 111\r\n', '\r\n', 'abc'*1024+'\r\n', '--foo--')
+        self.assertMPError()
+
+    def test_no_disposition_header(self):
+        self.write('--foo\r\n',
+                   'Content-Type: image/png\r\n', '\r\n', 'abc'*1024+'\r\n', '--foo--')
+        self.assertMPError()
+
diff --git a/test/tools.py b/test/tools.py
@@ -164,17 +164,17 @@ def multipart_environ(fields, files):
     boundary = '--' + boundary
     body = ''
     for name, value in fields:
-        body += boundary + '\n'
-        body += 'Content-Disposition: form-data; name="%s"\n\n' % name
-        body += value + '\n'
+        body += boundary + '\r\n'
+        body += 'Content-Disposition: form-data; name="%s"\r\n\r\n' % name
+        body += value + '\r\n'
     for name, filename, content in files:
         mimetype = str(mimetypes.guess_type(filename)[0]) or 'application/octet-stream'
-        body += boundary + '\n'
-        body += 'Content-Disposition: file; name="%s"; filename="%s"\n' % \
+        body += boundary + '\r\n'
+        body += 'Content-Disposition: file; name="%s"; filename="%s"\r\n' % \
              (name, filename)
-        body += 'Content-Type: %s\n\n' % mimetype
-        body += content + '\n'
-    body += boundary + '--\n'
+        body += 'Content-Type: %s\r\n\r\n' % mimetype
+        body += content + '\r\n'
+    body += boundary + '--\r\n'
     if isinstance(body, str):
         body = body.encode('utf8')
     env['CONTENT_LENGTH'] = str(len(body))