diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index d0c7f94f813d06..dc4354873e5975 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -593,6 +593,8 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:sentry-app-manual-token-refresh", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enables Conduit demo endpoint and UI manager.add("organizations:conduit-demo", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) + # Enable hard timeout alarm for webhooks + manager.add("organizations:sentry-app-webhook-hard-timeout", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Enables organization access to the new notification platform manager.add("organizations:notification-platform.internal-testing", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index dee811fc271b34..f247831e82faf1 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -2553,6 +2553,13 @@ flags=FLAG_AUTOMATOR_MODIFIABLE, ) +# Hard timeout for webhook requests to prevent indefinite hangs +register( + "sentry-apps.webhook.hard-timeout.sec", + default=5.0, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) + # Enables statistical detectors for a project register( "statistical_detectors.enable", diff --git a/src/sentry/sentry_apps/api/endpoints/installation_details.py b/src/sentry/sentry_apps/api/endpoints/installation_details.py index f2ada20c049c78..8e06393a72836a 100644 --- a/src/sentry/sentry_apps/api/endpoints/installation_details.py +++ b/src/sentry/sentry_apps/api/endpoints/installation_details.py @@ -45,19 +45,9 @@ def get(self, request: Request, installation) -> Response: def delete(self, request: Request, installation) -> Response: sentry_app_installation = SentryAppInstallation.objects.get(id=installation.id) - with transaction.atomic(using=router.db_for_write(SentryAppInstallation)): - try: - assert request.user.is_authenticated, ( - "User must be authenticated to delete installation" - ) - SentryAppInstallationNotifier( - sentry_app_installation=sentry_app_installation, - user=request.user, - action="deleted", - ).run() - # if the error is from a request exception, log the error and continue - except RequestException as exc: - sentry_sdk.capture_exception(exc) + db = router.db_for_write(SentryAppInstallation) + + with transaction.atomic(using=db): sentry_app_installation.update(status=SentryAppInstallationStatus.PENDING_DELETION) ScheduledDeletion.schedule(sentry_app_installation, days=0, actor=request.user) create_audit_entry( @@ -67,6 +57,21 @@ def delete(self, request: Request, installation) -> Response: event=audit_log.get_event_id("SENTRY_APP_UNINSTALL"), data={"sentry_app": sentry_app_installation.sentry_app.name}, ) + + def notify_on_commit() -> None: + try: + assert request.user.is_authenticated, ( + "User must be authenticated to delete installation" + ) + SentryAppInstallationNotifier( + sentry_app_installation=sentry_app_installation, + user=request.user, + action="deleted", + ).run() + except RequestException as exc: + sentry_sdk.capture_exception(exc) + + transaction.on_commit(notify_on_commit, using=db) if request.user.is_authenticated: analytics.record( SentryAppUninstalledEvent( diff --git a/src/sentry/sentry_apps/metrics.py b/src/sentry/sentry_apps/metrics.py index dfd9d90fc279a7..aad977fa71eb26 100644 --- a/src/sentry/sentry_apps/metrics.py +++ b/src/sentry/sentry_apps/metrics.py @@ -70,6 +70,7 @@ class SentryAppWebhookHaltReason(StrEnum): MISSING_INSTALLATION = "missing_installation" RESTRICTED_IP = "restricted_ip" CONNECTION_RESET = "connection_reset" + HARD_TIMEOUT = "hard_timeout" class SentryAppExternalRequestFailureReason(StrEnum): diff --git a/src/sentry/utils/sentry_apps/webhooks.py b/src/sentry/utils/sentry_apps/webhooks.py index 97399851e0306a..f50e7e923f474a 100644 --- a/src/sentry/utils/sentry_apps/webhooks.py +++ b/src/sentry/utils/sentry_apps/webhooks.py @@ -2,6 +2,7 @@ import logging from collections.abc import Callable, Mapping +from types import FrameType from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar import sentry_sdk @@ -9,9 +10,10 @@ from requests.exceptions import ChunkedEncodingError, ConnectionError, Timeout from rest_framework import status -from sentry import options +from sentry import features, options from sentry.exceptions import RestrictedIPAddress from sentry.http import safe_urlopen +from sentry.organizations.services.organization.service import organization_service from sentry.sentry_apps.metrics import ( SentryAppEventType, SentryAppWebhookFailureReason, @@ -20,6 +22,7 @@ from sentry.sentry_apps.models.sentry_app import SentryApp, track_response_code from sentry.sentry_apps.utils.errors import SentryAppSentryError from sentry.shared_integrations.exceptions import ApiHostError, ApiTimeoutError, ClientError +from sentry.taskworker.workerchild import timeout_alarm from sentry.utils.sentry_apps import SentryAppWebhookRequestsBuffer if TYPE_CHECKING: @@ -36,6 +39,23 @@ T = TypeVar("T", bound=Mapping[str, Any]) +class WebhookTimeoutError(Exception): + """This error represents a user set hard timeout for when a + webhook request should've completed within X seconds + """ + + pass + + +def _handle_webhook_timeout(signum: int, frame: FrameType | None) -> None: + """Handler for when a webhook request exceeds the hard timeout deadline. + - This is a workaround for safe_create_connection sockets hanging when the given url + cannot be reached or resolved. + - TODO(christinarlong): Add sentry app disabling logic here + """ + raise WebhookTimeoutError("Webhook request exceeded hard timeout deadline") + + def ignore_unpublished_app_errors( func: Callable[Concatenate[SentryApp | RpcSentryApp, P], R], ) -> Callable[Concatenate[SentryApp | RpcSentryApp, P], R | None]: @@ -99,12 +119,34 @@ def send_and_save_webhook_request( assert url is not None try: - response = safe_urlopen( - url=url, - data=app_platform_event.body, - headers=app_platform_event.headers, - timeout=options.get("sentry-apps.webhook.timeout.sec"), + organization_context = organization_service.get_organization_by_id( + id=app_platform_event.install.organization_id, + ) + if organization_context is not None and features.has( + "organizations:sentry-app-webhook-hard-timeout", + organization_context.organization, + ): + timeout_seconds = int(options.get("sentry-apps.webhook.hard-timeout.sec")) + with timeout_alarm(timeout_seconds, _handle_webhook_timeout): + response = safe_urlopen( + url=url, + data=app_platform_event.body, + headers=app_platform_event.headers, + timeout=options.get("sentry-apps.webhook.timeout.sec"), + ) + else: + response = safe_urlopen( + url=url, + data=app_platform_event.body, + headers=app_platform_event.headers, + timeout=options.get("sentry-apps.webhook.timeout.sec"), + ) + + except WebhookTimeoutError: + lifecycle.record_halt( + halt_reason=f"send_and_save_webhook_request.{SentryAppWebhookHaltReason.HARD_TIMEOUT}" ) + raise except (Timeout, ConnectionError) as e: error_type = e.__class__.__name__.lower() lifecycle.add_extras( diff --git a/tests/sentry/utils/sentry_apps/test_webhook_timeout.py b/tests/sentry/utils/sentry_apps/test_webhook_timeout.py new file mode 100644 index 00000000000000..145c1a7145f08d --- /dev/null +++ b/tests/sentry/utils/sentry_apps/test_webhook_timeout.py @@ -0,0 +1,217 @@ +import signal +import time +from unittest.mock import Mock, patch + +import pytest +from requests import Response + +from sentry.sentry_apps.api.serializers.app_platform_event import AppPlatformEvent +from sentry.sentry_apps.metrics import SentryAppWebhookHaltReason +from sentry.sentry_apps.utils.webhooks import IssueActionType, SentryAppResourceType +from sentry.testutils.asserts import assert_halt_metric +from sentry.testutils.cases import TestCase +from sentry.testutils.helpers.features import with_feature +from sentry.testutils.helpers.options import override_options +from sentry.testutils.silo import region_silo_test +from sentry.utils.sentry_apps.webhooks import WebhookTimeoutError, send_and_save_webhook_request + + +@region_silo_test +class WebhookTimeoutTest(TestCase): + def setUp(self): + self.organization = self.create_organization() + self.sentry_app = self.create_sentry_app( + name="TestApp", + organization=self.organization, + webhook_url="https://example.com/webhook", + published=True, + ) + self.install = self.create_sentry_app_installation( + organization=self.organization, slug=self.sentry_app.slug + ) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 5.0, + "sentry-apps.webhook.timeout.sec": 1.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_webhook_completes_before_timeout(self, mock_safe_urlopen): + # Mock successful response + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.headers = {} + mock_safe_urlopen.return_value = mock_response + + app_platform_event = AppPlatformEvent( + resource=SentryAppResourceType.ISSUE, + action=IssueActionType.CREATED, + install=self.install, + data={"test": "data"}, + ) + + # Should complete without raising WebhookTimeoutError + response = send_and_save_webhook_request(self.sentry_app, app_platform_event) + + assert response == mock_response + mock_safe_urlopen.assert_called_once() + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 1.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_webhook_interrupted_by_hard_timeout(self, mock_safe_urlopen): + # Make safe_urlopen sleep longer than the timeout + def slow_urlopen(*args, **kwargs): + time.sleep(6.0) # Sleep longer than 1 second timeout + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + return mock_response + + mock_safe_urlopen.side_effect = slow_urlopen + + # Create event + app_platform_event = AppPlatformEvent( + resource=SentryAppResourceType.ISSUE, + action=IssueActionType.CREATED, + install=self.install, + data={"test": "data"}, + ) + + # Should raise WebhookTimeoutError + with pytest.raises(WebhookTimeoutError, match="Webhook request exceeded hard timeout"): + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 1.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_timeout_exception_propagates(self, mock_safe_urlopen): + # Make safe_urlopen sleep + def slow_urlopen(*args, **kwargs): + time.sleep(2.0) + return Mock(spec=Response) + + mock_safe_urlopen.side_effect = slow_urlopen + + app_platform_event = AppPlatformEvent( + resource=SentryAppResourceType.ISSUE, + action=IssueActionType.CREATED, + install=self.install, + data={"test": "data"}, + ) + + with pytest.raises(WebhookTimeoutError): + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 1.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + @patch("sentry.integrations.utils.metrics.EventLifecycle.record_event") + def test_lifecycle_record_halt_called_on_timeout(self, mock_record, mock_safe_urlopen): + # Make safe_urlopen sleep + def slow_urlopen(*args, **kwargs): + time.sleep(2.0) + return Mock(spec=Response) + + mock_safe_urlopen.side_effect = slow_urlopen + + app_platform_event = AppPlatformEvent( + resource=SentryAppResourceType.ISSUE, + action=IssueActionType.CREATED, + install=self.install, + data={"test": "data"}, + ) + + with pytest.raises(WebhookTimeoutError): + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + assert_halt_metric( + mock_record=mock_record, + error_msg=f"send_and_save_webhook_request.{SentryAppWebhookHaltReason.HARD_TIMEOUT}", + ) + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 5.0, + "sentry-apps.webhook.timeout.sec": 10.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_timeout_alarm_restores_signal_handler(self, mock_safe_urlopen): + # Get original handler + original_handler = signal.signal(signal.SIGALRM, signal.SIG_DFL) + signal.signal(signal.SIGALRM, original_handler) + + # Mock quick response + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.headers = {} + mock_safe_urlopen.return_value = mock_response + + app_platform_event = AppPlatformEvent( + resource=SentryAppResourceType.ISSUE, + action=IssueActionType.CREATED, + install=self.install, + data={"test": "data"}, + ) + + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + # Verify signal handler was restored + current_handler = signal.signal(signal.SIGALRM, signal.SIG_DFL) + signal.signal(signal.SIGALRM, current_handler) + assert current_handler == original_handler + + @with_feature("organizations:sentry-app-webhook-hard-timeout") + @override_options( + { + "sentry-apps.webhook.hard-timeout.sec": 5.0, + "sentry-apps.webhook.timeout.sec": 1.0, + "sentry-apps.webhook.restricted-webhook-sending": [], + "sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []}, + } + ) + @patch("sentry.utils.sentry_apps.webhooks.safe_urlopen") + def test_alarm_cancelled_after_successful_webhook(self, mock_safe_urlopen): + mock_response = Mock(spec=Response) + mock_response.status_code = 200 + mock_response.headers = {} + mock_safe_urlopen.return_value = mock_response + + app_platform_event = AppPlatformEvent( + resource=SentryAppResourceType.ISSUE, + action=IssueActionType.CREATED, + install=self.install, + data={"test": "data"}, + ) + + send_and_save_webhook_request(self.sentry_app, app_platform_event) + + # Verify no alarm is pending + remaining = signal.alarm(0) + assert remaining == 0 # No alarm was pending