Skip to content
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("organizations:sentry-app-manual-token-refresh", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Enables Conduit demo endpoint and UI
manager.add("organizations:conduit-demo", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
# Enable hard timeout alarm for webhooks
manager.add("organizations:sentry-app-webhook-hard-timeout", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)

# Enables organization access to the new notification platform
manager.add("organizations:notification-platform.internal-testing", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
Expand Down
7 changes: 7 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -2553,6 +2553,13 @@
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# Hard timeout for webhook requests to prevent indefinite hangs
register(
"sentry-apps.webhook.hard-timeout.sec",
default=5.0,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# Enables statistical detectors for a project
register(
"statistical_detectors.enable",
Expand Down
31 changes: 18 additions & 13 deletions src/sentry/sentry_apps/api/endpoints/installation_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,9 @@ def get(self, request: Request, installation) -> Response:

def delete(self, request: Request, installation) -> Response:
sentry_app_installation = SentryAppInstallation.objects.get(id=installation.id)
with transaction.atomic(using=router.db_for_write(SentryAppInstallation)):
try:
assert request.user.is_authenticated, (
"User must be authenticated to delete installation"
)
SentryAppInstallationNotifier(
sentry_app_installation=sentry_app_installation,
user=request.user,
action="deleted",
).run()
# if the error is from a request exception, log the error and continue
except RequestException as exc:
sentry_sdk.capture_exception(exc)
db = router.db_for_write(SentryAppInstallation)

with transaction.atomic(using=db):
sentry_app_installation.update(status=SentryAppInstallationStatus.PENDING_DELETION)
ScheduledDeletion.schedule(sentry_app_installation, days=0, actor=request.user)
create_audit_entry(
Expand All @@ -67,6 +57,21 @@ def delete(self, request: Request, installation) -> Response:
event=audit_log.get_event_id("SENTRY_APP_UNINSTALL"),
data={"sentry_app": sentry_app_installation.sentry_app.name},
)

def notify_on_commit() -> None:
try:
assert request.user.is_authenticated, (
"User must be authenticated to delete installation"
)
SentryAppInstallationNotifier(
sentry_app_installation=sentry_app_installation,
user=request.user,
action="deleted",
).run()
except RequestException as exc:
sentry_sdk.capture_exception(exc)

transaction.on_commit(notify_on_commit, using=db)
if request.user.is_authenticated:
analytics.record(
SentryAppUninstalledEvent(
Expand Down
1 change: 1 addition & 0 deletions src/sentry/sentry_apps/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class SentryAppWebhookHaltReason(StrEnum):
MISSING_INSTALLATION = "missing_installation"
RESTRICTED_IP = "restricted_ip"
CONNECTION_RESET = "connection_reset"
HARD_TIMEOUT = "hard_timeout"


class SentryAppExternalRequestFailureReason(StrEnum):
Expand Down
54 changes: 48 additions & 6 deletions src/sentry/utils/sentry_apps/webhooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@

import logging
from collections.abc import Callable, Mapping
from types import FrameType
from typing import TYPE_CHECKING, Any, Concatenate, ParamSpec, TypeVar

import sentry_sdk
from requests import RequestException, Response
from requests.exceptions import ChunkedEncodingError, ConnectionError, Timeout
from rest_framework import status

from sentry import options
from sentry import features, options
from sentry.exceptions import RestrictedIPAddress
from sentry.http import safe_urlopen
from sentry.organizations.services.organization.service import organization_service
from sentry.sentry_apps.metrics import (
SentryAppEventType,
SentryAppWebhookFailureReason,
Expand All @@ -20,6 +22,7 @@
from sentry.sentry_apps.models.sentry_app import SentryApp, track_response_code
from sentry.sentry_apps.utils.errors import SentryAppSentryError
from sentry.shared_integrations.exceptions import ApiHostError, ApiTimeoutError, ClientError
from sentry.taskworker.workerchild import timeout_alarm
from sentry.utils.sentry_apps import SentryAppWebhookRequestsBuffer

if TYPE_CHECKING:
Expand All @@ -36,6 +39,23 @@
T = TypeVar("T", bound=Mapping[str, Any])


class WebhookTimeoutError(Exception):
"""This error represents a user set hard timeout for when a
webhook request should've completed within X seconds
"""

pass


def _handle_webhook_timeout(signum: int, frame: FrameType | None) -> None:
"""Handler for when a webhook request exceeds the hard timeout deadline.
- This is a workaround for safe_create_connection sockets hanging when the given url
cannot be reached or resolved.
- TODO(christinarlong): Add sentry app disabling logic here
"""
raise WebhookTimeoutError("Webhook request exceeded hard timeout deadline")


def ignore_unpublished_app_errors(
func: Callable[Concatenate[SentryApp | RpcSentryApp, P], R],
) -> Callable[Concatenate[SentryApp | RpcSentryApp, P], R | None]:
Expand Down Expand Up @@ -99,12 +119,34 @@ def send_and_save_webhook_request(

assert url is not None
try:
response = safe_urlopen(
url=url,
data=app_platform_event.body,
headers=app_platform_event.headers,
timeout=options.get("sentry-apps.webhook.timeout.sec"),
organization_context = organization_service.get_organization_by_id(
id=app_platform_event.install.organization_id,
)
if organization_context is not None and features.has(
"organizations:sentry-app-webhook-hard-timeout",
organization_context.organization,
):
timeout_seconds = int(options.get("sentry-apps.webhook.hard-timeout.sec"))
with timeout_alarm(timeout_seconds, _handle_webhook_timeout):
response = safe_urlopen(
url=url,
data=app_platform_event.body,
headers=app_platform_event.headers,
timeout=options.get("sentry-apps.webhook.timeout.sec"),
)
Comment on lines +130 to +136
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the purpose of discerning between this and the task-level timeout we already enforce? Is this covering a different code path?

else:
response = safe_urlopen(
url=url,
data=app_platform_event.body,
headers=app_platform_event.headers,
timeout=options.get("sentry-apps.webhook.timeout.sec"),
)

except WebhookTimeoutError:
lifecycle.record_halt(
halt_reason=f"send_and_save_webhook_request.{SentryAppWebhookHaltReason.HARD_TIMEOUT}"
)
raise
except (Timeout, ConnectionError) as e:
error_type = e.__class__.__name__.lower()
lifecycle.add_extras(
Expand Down
217 changes: 217 additions & 0 deletions tests/sentry/utils/sentry_apps/test_webhook_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import signal
import time
from unittest.mock import Mock, patch

import pytest
from requests import Response

from sentry.sentry_apps.api.serializers.app_platform_event import AppPlatformEvent
from sentry.sentry_apps.metrics import SentryAppWebhookHaltReason
from sentry.sentry_apps.utils.webhooks import IssueActionType, SentryAppResourceType
from sentry.testutils.asserts import assert_halt_metric
from sentry.testutils.cases import TestCase
from sentry.testutils.helpers.features import with_feature
from sentry.testutils.helpers.options import override_options
from sentry.testutils.silo import region_silo_test
from sentry.utils.sentry_apps.webhooks import WebhookTimeoutError, send_and_save_webhook_request


@region_silo_test
class WebhookTimeoutTest(TestCase):
def setUp(self):
self.organization = self.create_organization()
self.sentry_app = self.create_sentry_app(
name="TestApp",
organization=self.organization,
webhook_url="https://example.com/webhook",
published=True,
)
self.install = self.create_sentry_app_installation(
organization=self.organization, slug=self.sentry_app.slug
)

@with_feature("organizations:sentry-app-webhook-hard-timeout")
@override_options(
{
"sentry-apps.webhook.hard-timeout.sec": 5.0,
"sentry-apps.webhook.timeout.sec": 1.0,
"sentry-apps.webhook.restricted-webhook-sending": [],
"sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []},
}
)
@patch("sentry.utils.sentry_apps.webhooks.safe_urlopen")
def test_webhook_completes_before_timeout(self, mock_safe_urlopen):
# Mock successful response
mock_response = Mock(spec=Response)
mock_response.status_code = 200
mock_response.headers = {}
mock_safe_urlopen.return_value = mock_response

app_platform_event = AppPlatformEvent(
resource=SentryAppResourceType.ISSUE,
action=IssueActionType.CREATED,
install=self.install,
data={"test": "data"},
)

# Should complete without raising WebhookTimeoutError
response = send_and_save_webhook_request(self.sentry_app, app_platform_event)

assert response == mock_response
mock_safe_urlopen.assert_called_once()

@with_feature("organizations:sentry-app-webhook-hard-timeout")
@override_options(
{
"sentry-apps.webhook.hard-timeout.sec": 1.0,
"sentry-apps.webhook.timeout.sec": 10.0,
"sentry-apps.webhook.restricted-webhook-sending": [],
"sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []},
}
)
@patch("sentry.utils.sentry_apps.webhooks.safe_urlopen")
def test_webhook_interrupted_by_hard_timeout(self, mock_safe_urlopen):
# Make safe_urlopen sleep longer than the timeout
def slow_urlopen(*args, **kwargs):
time.sleep(6.0) # Sleep longer than 1 second timeout
mock_response = Mock(spec=Response)
mock_response.status_code = 200
return mock_response

mock_safe_urlopen.side_effect = slow_urlopen

# Create event
app_platform_event = AppPlatformEvent(
resource=SentryAppResourceType.ISSUE,
action=IssueActionType.CREATED,
install=self.install,
data={"test": "data"},
)

# Should raise WebhookTimeoutError
with pytest.raises(WebhookTimeoutError, match="Webhook request exceeded hard timeout"):
send_and_save_webhook_request(self.sentry_app, app_platform_event)

@with_feature("organizations:sentry-app-webhook-hard-timeout")
@override_options(
{
"sentry-apps.webhook.hard-timeout.sec": 1.0,
"sentry-apps.webhook.timeout.sec": 10.0,
"sentry-apps.webhook.restricted-webhook-sending": [],
}
)
@patch("sentry.utils.sentry_apps.webhooks.safe_urlopen")
def test_timeout_exception_propagates(self, mock_safe_urlopen):
# Make safe_urlopen sleep
def slow_urlopen(*args, **kwargs):
time.sleep(2.0)
return Mock(spec=Response)

mock_safe_urlopen.side_effect = slow_urlopen

app_platform_event = AppPlatformEvent(
resource=SentryAppResourceType.ISSUE,
action=IssueActionType.CREATED,
install=self.install,
data={"test": "data"},
)

with pytest.raises(WebhookTimeoutError):
send_and_save_webhook_request(self.sentry_app, app_platform_event)

@with_feature("organizations:sentry-app-webhook-hard-timeout")
@override_options(
{
"sentry-apps.webhook.hard-timeout.sec": 1.0,
"sentry-apps.webhook.timeout.sec": 10.0,
"sentry-apps.webhook.restricted-webhook-sending": [],
}
)
@patch("sentry.utils.sentry_apps.webhooks.safe_urlopen")
@patch("sentry.integrations.utils.metrics.EventLifecycle.record_event")
def test_lifecycle_record_halt_called_on_timeout(self, mock_record, mock_safe_urlopen):
# Make safe_urlopen sleep
def slow_urlopen(*args, **kwargs):
time.sleep(2.0)
return Mock(spec=Response)

mock_safe_urlopen.side_effect = slow_urlopen

app_platform_event = AppPlatformEvent(
resource=SentryAppResourceType.ISSUE,
action=IssueActionType.CREATED,
install=self.install,
data={"test": "data"},
)

with pytest.raises(WebhookTimeoutError):
send_and_save_webhook_request(self.sentry_app, app_platform_event)

assert_halt_metric(
mock_record=mock_record,
error_msg=f"send_and_save_webhook_request.{SentryAppWebhookHaltReason.HARD_TIMEOUT}",
)

@with_feature("organizations:sentry-app-webhook-hard-timeout")
@override_options(
{
"sentry-apps.webhook.hard-timeout.sec": 5.0,
"sentry-apps.webhook.timeout.sec": 10.0,
"sentry-apps.webhook.restricted-webhook-sending": [],
"sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []},
}
)
@patch("sentry.utils.sentry_apps.webhooks.safe_urlopen")
def test_timeout_alarm_restores_signal_handler(self, mock_safe_urlopen):
# Get original handler
original_handler = signal.signal(signal.SIGALRM, signal.SIG_DFL)
signal.signal(signal.SIGALRM, original_handler)

# Mock quick response
mock_response = Mock(spec=Response)
mock_response.status_code = 200
mock_response.headers = {}
mock_safe_urlopen.return_value = mock_response

app_platform_event = AppPlatformEvent(
resource=SentryAppResourceType.ISSUE,
action=IssueActionType.CREATED,
install=self.install,
data={"test": "data"},
)

send_and_save_webhook_request(self.sentry_app, app_platform_event)

# Verify signal handler was restored
current_handler = signal.signal(signal.SIGALRM, signal.SIG_DFL)
signal.signal(signal.SIGALRM, current_handler)
assert current_handler == original_handler

@with_feature("organizations:sentry-app-webhook-hard-timeout")
@override_options(
{
"sentry-apps.webhook.hard-timeout.sec": 5.0,
"sentry-apps.webhook.timeout.sec": 1.0,
"sentry-apps.webhook.restricted-webhook-sending": [],
"sentry-apps.webhook-logging.enabled": {"installation_uuid": [], "sentry_app_slug": []},
}
)
@patch("sentry.utils.sentry_apps.webhooks.safe_urlopen")
def test_alarm_cancelled_after_successful_webhook(self, mock_safe_urlopen):
mock_response = Mock(spec=Response)
mock_response.status_code = 200
mock_response.headers = {}
mock_safe_urlopen.return_value = mock_response

app_platform_event = AppPlatformEvent(
resource=SentryAppResourceType.ISSUE,
action=IssueActionType.CREATED,
install=self.install,
data={"test": "data"},
)

send_and_save_webhook_request(self.sentry_app, app_platform_event)

# Verify no alarm is pending
remaining = signal.alarm(0)
assert remaining == 0 # No alarm was pending
Loading