Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/sentry/notifications/platform/slack/renderers/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@
SeerAutofixTrigger,
SeerAutofixUpdate,
)
from sentry.notifications.platform.types import NotificationData, NotificationRenderedTemplate
from sentry.notifications.platform.types import (
NotificationData,
NotificationRenderedTemplate,
)
from sentry.seer.autofix.utils import AutofixStoppingPoint

if TYPE_CHECKING:
Expand Down Expand Up @@ -179,7 +182,12 @@ def _render_autofix_update(cls, data: SeerAutofixUpdate) -> SlackRenderable:

@classmethod
def _render_link_button(
cls, *, organization_id: int, project_id: int, group_link: str, text: str = "View in Sentry"
cls,
*,
organization_id: int,
project_id: int,
group_link: str,
text: str = "View in Sentry",
) -> LinkButtonElement:
from sentry.integrations.slack.message_builder.routing import encode_action_id
from sentry.integrations.slack.message_builder.types import SlackAction
Expand Down
19 changes: 19 additions & 0 deletions src/sentry/seer/autofix/autofix_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,25 @@ class AutofixStep(StrEnum):
IMPACT_ASSESSMENT = "impact_assessment"
TRIAGE = "triage"

@staticmethod
def from_autofix_stopping_point(
autofix_stopping_point: AutofixStoppingPoint,
) -> AutofixStep:
match autofix_stopping_point:
case AutofixStoppingPoint.ROOT_CAUSE:
return AutofixStep.ROOT_CAUSE
case AutofixStoppingPoint.SOLUTION:
return AutofixStep.SOLUTION
case AutofixStoppingPoint.CODE_CHANGES:
return AutofixStep.CODE_CHANGES
case AutofixStoppingPoint.OPEN_PR:
# This depends on the last step being
# code changes and we should look for
# the PR elsewhere in the explorer results
return AutofixStep.CODE_CHANGES
case _:
raise ValueError(f"Unsupported AutofixStoppingPoint: {autofix_stopping_point}")


class StepConfig:
"""Configuration for an autofix step."""
Expand Down
208 changes: 203 additions & 5 deletions src/sentry/seer/entrypoints/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
from sentry.constants import DataCategory
from sentry.models.group import Group
from sentry.models.organization import Organization
from sentry.seer.autofix.autofix import trigger_autofix as _trigger_autofix
from sentry.seer.autofix.autofix import update_autofix
from sentry.seer.autofix.constants import AutofixReferrer
from sentry.seer.autofix.autofix import trigger_autofix, update_autofix
from sentry.seer.autofix.constants import AutofixReferrer, AutofixStatus
from sentry.seer.autofix.types import (
AutofixCreatePRPayload,
AutofixSelectRootCausePayload,
Expand All @@ -27,6 +26,7 @@
)
from sentry.seer.entrypoints.registry import entrypoint_registry
from sentry.seer.entrypoints.types import SeerEntrypoint, SeerEntrypointKey
from sentry.seer.explorer.client_models import SeerRunState
from sentry.seer.seer_setup import has_seer_access
from sentry.sentry_apps.metrics import SentryAppEventType
from sentry.tasks.base import instrumented_task
Expand Down Expand Up @@ -129,6 +129,144 @@ def trigger_autofix(
stopping_point: AutofixStoppingPoint,
instruction: str | None = None,
run_id: int | None = None,
) -> None:
if features.has("organizations:autofix-on-explorer", group.organization):
self.trigger_autofix_explorer(
group=group,
user=user,
stopping_point=stopping_point,
instruction=instruction,
run_id=run_id,
)
else:
self.trigger_autofix_legacy(
group=group,
user=user,
stopping_point=stopping_point,
instruction=instruction,
run_id=run_id,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

has_access blocks explorer orgs making new path unreachable

High Severity

The new trigger_autofix routing dispatches to trigger_autofix_explorer when organizations:autofix-on-explorer is enabled, but the existing has_access method returns False for the exact same flag (line 84). Since has_access gates whether autofix buttons appear in Slack messages and whether update caching is set up, the new explorer path is unreachable in production — no user can ever trigger it.

Additional Locations (1)

Fix in Cursor Fix in Web


def trigger_autofix_explorer(
self,
*,
group: Group,
user: User | RpcUser,
stopping_point: AutofixStoppingPoint,
instruction: str | None = None,
run_id: int | None = None,
) -> None:
Comment on lines +150 to +158
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The instruction parameter in trigger_autofix_explorer is accepted but its value is never used, causing user instructions to be silently ignored in the explorer flow.
Severity: MEDIUM

Suggested Fix

To fix this, the instruction parameter should be removed from the SeerOperator.trigger_autofix_explorer method signature to avoid confusion, as the underlying functionality does not support it. Alternatively, if instruction support is intended, the underlying trigger_autofix_explorer function in autofix_agent.py must be updated to accept and utilize the instruction parameter.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/seer/entrypoints/operator.py#L150-L158

Potential issue: The `trigger_autofix_explorer` method in the `SeerOperator` class
accepts an `instruction` parameter, implying that users can provide custom instructions
to guide the autofix process. However, this parameter is never used within the method's
body. The value is not passed to the underlying `trigger_autofix_explorer` function,
which does not support this parameter. This leads to user-provided instructions being
silently ignored when the explorer-based autofix flow is triggered, resulting in autofix
behavior that does not match user expectations.

from sentry.seer.autofix.autofix_agent import (
AutofixStep,
get_autofix_explorer_state,
trigger_autofix_explorer,
)

event_lifecyle = SeerOperatorEventLifecycleMetric(
interaction_type=SeerOperatorInteractionType.OPERATOR_TRIGGER_AUTOFIX,
entrypoint_key=self.entrypoint.key,
)

with event_lifecyle.capture() as lifecycle:
lifecycle.add_extras(
{
"group_id": str(group.id),
"user_id": str(user.id),
"stopping_point": str(stopping_point),
}
)

try:
existing_state = get_autofix_explorer_state(group.organization, group.id)
except Exception as e:
with SeerOperatorEventLifecycleMetric(
interaction_type=SeerOperatorInteractionType.ENTRYPOINT_ON_TRIGGER_AUTOFIX_ERROR,
entrypoint_key=self.entrypoint.key,
).capture():
self.entrypoint.on_trigger_autofix_error(
error="Encountered an error while talking to Seer"
)
lifecycle.record_failure(failure_reason=e)
return
if existing_state:
has_complete_stage = get_autofix_explorer_status(stopping_point, existing_state)
lifecycle.add_extras(
{
"existing_run_id": str(existing_state.run_id),
"existing_run_status": str(existing_state.status),
}
)

# For now, we don't support re-runs over slack -- it causes a confusing UX without
# reliably being able to edit messages.
if has_complete_stage is not None:
with SeerOperatorEventLifecycleMetric(
interaction_type=SeerOperatorInteractionType.ENTRYPOINT_ON_TRIGGER_AUTOFIX_ALREADY_EXISTS,
entrypoint_key=self.entrypoint.key,
).capture():
self.entrypoint.on_trigger_autofix_already_exists(
run_id=existing_state.run_id,
has_complete_stage=has_complete_stage,
)
return

if not run_id:
run_id = trigger_autofix_explorer(
group=group,
step=AutofixStep.ROOT_CAUSE,
run_id=None,
)
elif stopping_point == AutofixStoppingPoint.OPEN_PR:
pass # TODO: OPENING PRs is a little more complicated so putting it off for now
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OPEN_PR no-op falls through to false success signal

Medium Severity

When stopping_point is OPEN_PR and run_id is set, the elif branch does pass but doesn't return, so execution falls through to on_trigger_autofix_success and cache creation. This sends a misleading success message to Slack and creates a cache entry even though no autofix step was actually triggered. The legacy path properly handles OPEN_PR by building an AutofixCreatePRPayload. The pass here needs an early return or error notification to avoid false user-facing feedback.

Additional Locations (1)

Fix in Cursor Fix in Web

else:
# NOTE: Stopping point here is really just what
# step to run next. Not the same as the stopping_point
# argument supported by `trigger_autofix_explorer` which allows one
# to run multiple steps at once
run_id = trigger_autofix_explorer(
group=group,
step=AutofixStep.from_autofix_stopping_point(stopping_point),
run_id=run_id,
)

lifecycle.add_extra("run_id", str(run_id))

# Let the entrypoint signal to the external service that the run started
with SeerOperatorEventLifecycleMetric(
interaction_type=SeerOperatorInteractionType.ENTRYPOINT_ON_TRIGGER_AUTOFIX_SUCCESS,
entrypoint_key=self.entrypoint.key,
).capture():
self.entrypoint.on_trigger_autofix_success(run_id=run_id)

# Create a cache payload that will be picked up for subsequent updates
with SeerOperatorEventLifecycleMetric(
interaction_type=SeerOperatorInteractionType.ENTRYPOINT_CREATE_AUTOFIX_CACHE_PAYLOAD,
entrypoint_key=self.entrypoint.key,
).capture():
cache_payload = self.entrypoint.create_autofix_cache_payload()

if not cache_payload:
return
cache_result = SeerOperatorAutofixCache.populate_post_autofix_cache(
entrypoint_key=str(self.entrypoint.key),
cache_payload=cache_payload,
run_id=run_id,
)
lifecycle.add_extras(
{
"cache_key": cache_result["key"],
"cache_source": cache_result["source"],
}
)

def trigger_autofix_legacy(
self,
*,
group: Group,
user: User | RpcUser,
stopping_point: AutofixStoppingPoint,
instruction: str | None = None,
run_id: int | None = None,
) -> None:
event_lifecyle = SeerOperatorEventLifecycleMetric(
interaction_type=SeerOperatorInteractionType.OPERATOR_TRIGGER_AUTOFIX,
Expand Down Expand Up @@ -173,13 +311,20 @@ def trigger_autofix(
interaction_type=SeerOperatorInteractionType.ENTRYPOINT_ON_TRIGGER_AUTOFIX_ALREADY_EXISTS,
entrypoint_key=self.entrypoint.key,
).capture():
has_complete_stage = (
False
if stopping_point_step.get("key")
in {"root_cause_analysis_processing", "solution_processing"}
else stopping_point_step.get("status") == AutofixStatus.COMPLETED
)
self.entrypoint.on_trigger_autofix_already_exists(
state=existing_state, step_state=stopping_point_step
run_id=existing_state.run_id,
has_complete_stage=has_complete_stage,
)
return

if not run_id:
raw_response = _trigger_autofix(
raw_response = trigger_autofix(
group=group,
user=user,
referrer=AutofixReferrer.SLACK,
Expand Down Expand Up @@ -374,6 +519,59 @@ def get_stopping_point_status(
return step


def get_autofix_explorer_status(
stopping_point: AutofixStoppingPoint, autofix_state: SeerRunState
) -> bool | None:
from sentry.seer.autofix.autofix_agent import AutofixStep

expected_step = AutofixStep.from_autofix_stopping_point(stopping_point)

is_last = True
for block in reversed(autofix_state.blocks):
metadata = block.message.metadata
if metadata is None:
continue

step_str = metadata.get("step")
if step_str is None:
continue

try:
step = AutofixStep(step_str)
except ValueError:
continue

if step == expected_step:
# If the expected step is not the last step
# then we can assume it is already completed
# so return True to indicate that
if not is_last:
return True

# If the expected step is the last step, then
# we check the run state to see if it's processing
#
# Everything except the processing status
# is considered as some form of completed
completed = autofix_state.status != "processing"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The function get_autofix_explorer_status incorrectly treats "error" and "awaiting_user_input" states as "completed", misleading users and breaking the autofix flow.
Severity: HIGH

Suggested Fix

The completion check should be more specific. Change the line completed = autofix_state.status != "processing" to completed = autofix_state.status == "completed" to ensure only successfully completed runs are marked as such. This will allow other states like "error" and "awaiting_user_input" to be handled correctly.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/seer/entrypoints/operator.py#L556

Potential issue: The function `get_autofix_explorer_status` at line 556 uses the logic
`completed = autofix_state.status != "processing"` to determine if a step is complete.
This incorrectly classifies runs with `"error"` or `"awaiting_user_input"` statuses as
complete. As a result, when an autofix run fails or requires user interaction, the
system reports it as successful in Slack. This misleads the user about the outcome and
prevents them from retrying a failed run or providing input to a paused one, effectively
freezing the process from their perspective.


# OPEN_PR step gets special treatment to also
# check on the status of the pr creation
if stopping_point == AutofixStoppingPoint.OPEN_PR and completed:
return all(
pr_state.pr_creation_status != "creating"
for pr_state in autofix_state.repo_pr_states.values()
)
Comment on lines +560 to +564
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The use of all() on an empty repo_pr_states dictionary incorrectly returns True, marking the OPEN_PR stage as complete when no PRs have been created.
Severity: MEDIUM

Suggested Fix

Modify the logic to ensure that repo_pr_states is not empty before evaluating the all() condition. The expression should only return True if the dictionary is non-empty and all its values meet the condition. For example: return bool(autofix_state.repo_pr_states) and all(...).

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/seer/entrypoints/operator.py#L560-L564

Potential issue: When checking the status of an `OPEN_PR` stopping point, the code
evaluates `all(pr_state.pr_creation_status != "creating" for pr_state in
autofix_state.repo_pr_states.values())`. If the `repo_pr_states` dictionary is empty,
which can happen if the PR creation step failed or has not run, Python's `all()`
function on the empty collection returns `True`. This incorrectly signals that the
`OPEN_PR` stage is complete. As a result, the system may prevent the user from
triggering or re-triggering PR creation, as it believes the step has already finished
successfully.


return completed

is_last = False

# no block matching the stopping point found, so return None
# to indicate the step has not run before
return None


def get_latest_cause_id(autofix_state: AutofixState | None) -> int:
"""
Gets the latest cause_id from a given autofix state.
Expand Down
22 changes: 11 additions & 11 deletions src/sentry/seer/entrypoints/slack/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
from sentry.constants import ENABLE_SEER_ENHANCED_ALERTS_DEFAULT
from sentry.locks import locks
from sentry.models.organization import Organization
from sentry.notifications.platform.templates.seer import SeerAutofixError, SeerAutofixUpdate
from sentry.notifications.platform.templates.seer import (
SeerAutofixError,
SeerAutofixUpdate,
)
from sentry.notifications.utils.actions import BlockKitMessageAction
from sentry.seer.autofix.constants import AutofixStatus
from sentry.seer.autofix.utils import AutofixState, AutofixStoppingPoint
from sentry.seer.autofix.utils import AutofixStoppingPoint
from sentry.seer.entrypoints.cache import SeerOperatorAutofixCache
from sentry.seer.entrypoints.registry import entrypoint_registry
from sentry.seer.entrypoints.slack.messaging import (
Expand Down Expand Up @@ -157,15 +159,10 @@ def on_trigger_autofix_error(self, *, error: str) -> None:
def on_trigger_autofix_success(self, *, run_id: int) -> None:
self._update_existing_message(run_id=run_id, has_complete_stage=False, include_user=True)

def on_trigger_autofix_already_exists(self, *, state: AutofixState, step_state: dict) -> None:
def on_trigger_autofix_already_exists(self, *, run_id: int, has_complete_stage: bool) -> None:
# We don't include the user since we don't know that they started the original run.
has_complete_stage = (
False
if step_state.get("key") in {"root_cause_analysis_processing", "solution_processing"}
else step_state.get("status") == AutofixStatus.COMPLETED
)
self._update_existing_message(
run_id=state.run_id, has_complete_stage=has_complete_stage, include_user=False
run_id=run_id, has_complete_stage=has_complete_stage, include_user=False
)

def create_autofix_cache_payload(self) -> SlackEntrypointCachePayload:
Expand Down Expand Up @@ -229,7 +226,10 @@ def on_autofix_update(
for change in changes
]
data_kwargs.update(
{"current_point": AutofixStoppingPoint.CODE_CHANGES, "changes": changes_list}
{
"current_point": AutofixStoppingPoint.CODE_CHANGES,
"changes": changes_list,
}
)
case SentryAppEventType.SEER_PR_CREATED:
pull_requests = [
Expand Down
7 changes: 4 additions & 3 deletions src/sentry/seer/entrypoints/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Any, Literal, Protocol, TypedDict

from sentry.models.organization import Organization
from sentry.seer.autofix.utils import AutofixState
from sentry.sentry_apps.metrics import SentryAppEventType


Expand Down Expand Up @@ -31,7 +30,7 @@ def has_access(organization: Organization) -> bool:
"""
...

def on_trigger_autofix_already_exists(self, *, state: AutofixState, step_state: dict) -> None:
def on_trigger_autofix_already_exists(self, *, run_id: int, has_complete_stage: bool) -> None:
"""
Called when an autofix run already exists for the group.
Also passes the most recent state from the matching stopping_point step for convenience.
Expand Down Expand Up @@ -66,7 +65,9 @@ def create_autofix_cache_payload(self) -> CachePayloadT:

@staticmethod
def on_autofix_update(
event_type: SentryAppEventType, event_payload: dict[str, Any], cache_payload: CachePayloadT
event_type: SentryAppEventType,
event_payload: dict[str, Any],
cache_payload: CachePayloadT,
) -> None:
"""
Called when an autofix update is received (via Seer's webhooks).
Expand Down
Loading
Loading