Skip to content

Commit

Permalink
Added: Correctly handling unsupported content (see: http://doc.qt.io/…
Browse files Browse the repository at this point in the history
  • Loading branch information
starrify committed Dec 27, 2015
1 parent 2b1a390 commit 1853125
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 2 deletions.
54 changes: 52 additions & 2 deletions splash/browser_tab.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from splash.qtutils import (OPERATION_QT_CONSTANTS, WrappedSignal, qt2py,
qurl2ascii, to_qurl)
from splash.render_options import validate_size_str
from splash.qwebpage import SplashQWebPage, SplashQWebView
from splash.qwebpage import SplashQWebPage, SplashQWebView, RenderErrorInfo
from splash.exceptions import JsError, OneShotCallbackError
from splash.utils import to_bytes, escape_js

Expand Down Expand Up @@ -64,6 +64,9 @@ def __init__(self, network_manager, splash_proxy_factory, verbosity,
self._callback_proxies_to_cancel = weakref.WeakSet()
self._js_console = None
self._autoload_scripts = []
self._is_unsupported_content = False
self._unsupported_content_reply = None
self._load_finished_after_unsupported_content_ready = False

self.logger = _BrowserTabLogger(uid=self._uid, verbosity=verbosity)
self._init_webpage(verbosity, network_manager, splash_proxy_factory,
Expand Down Expand Up @@ -133,6 +136,8 @@ def _setup_webpage_events(self):
self.web_page.mainFrame().loadFinished.connect(self._on_load_finished)
self.web_page.mainFrame().urlChanged.connect(self._on_url_changed)
self.web_page.mainFrame().javaScriptWindowObjectCleared.connect(self._on_javascript_window_object_cleared)
self.web_page.setForwardUnsupportedContent(True)
self.web_page.unsupportedContent.connect(self._on_unsupported_content)
self.logger.add_web_page(self.web_page)

def return_result(self, result):
Expand Down Expand Up @@ -372,6 +377,15 @@ def _on_load_finished(self, ok):
This callback is called for all web_page.mainFrame()
loadFinished events.
"""
if self._is_unsupported_content:
if self._unsupported_content_reply.isRunning():
# XXX: We'll come back later when download finishes
self.logger.log(
'Still receving unsupported content', min_level=3)
return
else:
self._load_finished_after_unsupported_content_ready = True
self.logger.log('Unsupported content received', min_level=3)
if self.web_page.maybe_redirect(ok):
self.logger.log("Redirect or other non-fatal error detected", min_level=2)
return
Expand Down Expand Up @@ -419,7 +433,11 @@ def _on_content_ready(self, ok, callback, errback, callback_id):
"""
This method is called when a QWebPage finishes loading its contents.
"""
if self.web_page.maybe_redirect(ok):
if self._is_unsupported_content:
if self._unsupported_content_reply.isRunning():
# XXX: We'll come back later when download finishes
return
elif self.web_page.maybe_redirect(ok):
# XXX: It assumes loadFinished will be called again because
# redirect happens. If redirect is detected improperly,
# loadFinished won't be called again, and Splash will return
Expand All @@ -431,6 +449,16 @@ def _on_content_ready(self, ok, callback, errback, callback_id):

if self.web_page.is_ok(ok):
callback()
elif self._is_unsupported_content:
# XXX: Error downloading unsupported content.
# `self.web_page.error_info` shall be `None` now
error_info = RenderErrorInfo(
'Network',
int(self._unsupported_content_reply.error()),
six.text_type(self._unsupported_content_reply.errorString()),
six.text_type(self._unsupported_content_reply.url().url())
)
errback(error_info)
elif self.web_page.error_loading(ok):
# XXX: maybe return a meaningful error page instead of generic
# error message?
Expand Down Expand Up @@ -505,6 +533,28 @@ def _on_url_changed(self, url):
self.web_page.har.store_redirect(six.text_type(url.toString()))
self._cancel_timers(self._timers_to_cancel_on_redirect)

def _on_unsupported_content_finished(self):
self.logger.log('Unsupported content finished', min_level=3)
if not self._load_finished_after_unsupported_content_ready:
# XXX: The unsupported content reply might have finished before the
# original loadFinished signal emits. In such cases we do not want
# the same signal twice.
if not self._unsupported_content_reply.error():
self.web_page.mainFrame().loadFinished.emit(True)
else:
self.web_page.mainFrame().loadFinished.emit(False)

def _on_unsupported_content(self, reply):
self.logger.log('Unsupported content detected', min_level=3)
self._is_unsupported_content = True
self._unsupported_content_reply = reply
if reply.isFinished():
# Already finished. The content might be very short.
self.logger.log('Unsupported content already finished', min_level=3)
self._on_unsupported_content_finished()
else:
reply.finished.connect(self._on_unsupported_content_finished)

def run_js_file(self, filename, handle_errors=True):
"""
Load JS library from file ``filename`` to the current frame.
Expand Down
19 changes: 19 additions & 0 deletions splash/tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,23 @@ def render_GET(self, request):
return b"ok"


class RawBytes(Resource):

def render_GET(self, request):
body_length = int(request.args.get(b'length', [1024])[0])
body = b'0' * body_length
claim_length = int(request.args.get(b'claim_length', [body_length])[0])
content = b'\n'.join([
b'HTTP/1.1 200 OK',
('Content-Length: %d' % claim_length).encode('utf8'),
b'',
body,
])
request.channel.transport.write(content)
request.channel.transport.loseConnection()
return NOT_DONE_YET # Already done


class Index(Resource):
isLeaf = True

Expand Down Expand Up @@ -820,6 +837,8 @@ def __init__(self, http_port, https_port, proxy_port):
self.putChild(b"bad-content-type", InvalidContentTypeResource())
self.putChild(b"bad-content-type2", InvalidContentTypeResource2())

self.putChild(b"raw-bytes", RawBytes())

self.putChild(b"jsredirect", JsRedirect())
self.putChild(b"jsredirect-to", JsRedirectTo())
self.putChild(b"jsredirect-slowimage", JsRedirectSlowImage())
Expand Down
15 changes: 15 additions & 0 deletions splash/tests/test_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,21 @@ def test_invalid_wait(self):
'wait': wait})
self.assertStatusCode(r, 400)

def test_unsupported_content(self):
cases = [
# Short body (Can be received together with the headers)
("raw-bytes?length=16", 200),
# Short body with error
("raw-bytes?length=16&claim_length=100", 502),
# Long body (Can't be received together with the headers)
("raw-bytes?length=10000", 200),
# Long body with error
("raw-bytes?length=10000&claim_length=20000", 502),
]
for url, http_status in cases:
r = self.request({"url": self.mockurl(url)})
self.assertStatusCode(r, http_status)

@pytest.mark.skipif(
not qt_551_plus(),
reason="resource_timeout doesn't work in Qt5 < 5.5.1. See issue #269 for details."
Expand Down

0 comments on commit 1853125

Please sign in to comment.