Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Add DISCORD_CONNECTOR to SearchSourceConnectorType and DocumentType enums

Revision ID: 9
Revises: 8
"""

from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = "9"
down_revision: Union[str, None] = "8"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None

# Define the ENUM type name and the new value
CONNECTOR_ENUM = "searchsourceconnectortype"
CONNECTOR_NEW_VALUE = "DISCORD_CONNECTOR"
DOCUMENT_ENUM = "documenttype"
DOCUMENT_NEW_VALUE = "DISCORD_CONNECTOR"


def upgrade() -> None:
"""Upgrade schema - add DISCORD_CONNECTOR to connector and document enum."""
# Add DISCORD_CONNECTOR to searchsourceconnectortype
op.execute(f"ALTER TYPE {CONNECTOR_ENUM} ADD VALUE '{CONNECTOR_NEW_VALUE}'")
# Add DISCORD_CONNECTOR to documenttype
op.execute(f"ALTER TYPE {DOCUMENT_ENUM} ADD VALUE '{DOCUMENT_NEW_VALUE}'")


def downgrade() -> None:
"""Downgrade schema - remove DISCORD_CONNECTOR from connector and document enum."""

# Old enum name
old_connector_enum_name = f"{CONNECTOR_ENUM}_old"
old_document_enum_name = f"{DOCUMENT_ENUM}_old"

old_connector_values = (
"SERPER_API",
"TAVILY_API",
"LINKUP_API",
"SLACK_CONNECTOR",
"NOTION_CONNECTOR",
"GITHUB_CONNECTOR",
"LINEAR_CONNECTOR",
)
old_document_values = (
"EXTENSION",
"CRAWLED_URL",
"FILE",
"SLACK_CONNECTOR",
"NOTION_CONNECTOR",
"YOUTUBE_VIDEO",
"GITHUB_CONNECTOR",
"LINEAR_CONNECTOR",
)

old_connector_values_sql = ", ".join([f"'{v}'" for v in old_connector_values])
old_document_values_sql = ", ".join([f"'{v}'" for v in old_document_values])

# Table and column names (adjust if different)
connector_table_name = "search_source_connectors"
connector_column_name = "connector_type"
document_table_name = "documents"
document_column_name = "document_type"

# Connector Enum Downgrade Steps
# 1. Rename the current connector enum type
op.execute(f"ALTER TYPE {CONNECTOR_ENUM} RENAME TO {old_connector_enum_name}")

# 2. Create the new connector enum type with the old values
op.execute(f"CREATE TYPE {CONNECTOR_ENUM} AS ENUM({old_connector_values_sql})")

# 3. Update the connector table:
op.execute(
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing DELETE operation for search_source_connectors table in downgrade(). While the code deletes DISCORD_CONNECTOR documents, it doesn't delete the corresponding connector entries. This asymmetry could lead to orphaned data since the SearchSourceConnector table has a unique constraint on connector_type. When downgrading and recreating the enum without DISCORD_CONNECTOR, any existing DISCORD_CONNECTOR entries would violate the type constraint.


React with 👍 to tell me that this comment was useful, or 👎 if not (and I'll stop posting more comments like this in the future)

f"ALTER TABLE {connector_table_name} "
f"ALTER COLUMN {connector_column_name} "
f"TYPE {CONNECTOR_ENUM} "
f"USING {connector_column_name}::text::{CONNECTOR_ENUM}"
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsafe type casting in the ALTER TABLE statement for search_source_connectors. The code attempts to cast connector_type to the new enum type without first verifying that all existing values can be safely cast. If any DISCORD_CONNECTOR entries exist, the cast will fail since the new enum type doesn't include this value. A DELETE operation should be performed before the cast.


React with 👍 to tell me that this comment was useful, or 👎 if not (and I'll stop posting more comments like this in the future)


# 4. Drop the old connector enum type
op.execute(f"DROP TYPE {old_connector_enum_name}")


# Document Enum Downgrade Steps
# 1. Rename the current document enum type
op.execute(f"ALTER TYPE {DOCUMENT_ENUM} RENAME TO {old_document_enum_name}")

# 2. Create the new document enum type with the old values
op.execute(f"CREATE TYPE {DOCUMENT_ENUM} AS ENUM({old_document_values_sql})")

# 3. Delete rows with the new value from the documents table
op.execute(
f"DELETE FROM {document_table_name} WHERE {document_column_name}::text = '{DOCUMENT_NEW_VALUE}'"
)

# 4. Alter the document table to use the new enum type (casting old values)
op.execute(
f"ALTER TABLE {document_table_name} "
f"ALTER COLUMN {document_column_name} "
f"TYPE {DOCUMENT_ENUM} "
f"USING {document_column_name}::text::{DOCUMENT_ENUM}"
)

# 5. Drop the old enum types
op.execute(f"DROP TYPE {old_document_enum_name}")

# ### end Alembic commands ###
17 changes: 17 additions & 0 deletions surfsense_backend/app/agents/researcher/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,23 @@ async def fetch_relevant_documents(
if streaming_service and writer:
streaming_service.only_update_terminal(f"🔗 Found {len(linkup_chunks)} Linkup results related to your query")
writer({"yeild_value": streaming_service._format_annotations()})

elif connector == "DISCORD_CONNECTOR":
source_object, discord_chunks = await connector_service.search_discord(
user_query=reformulated_query,
user_id=user_id,
search_space_id=search_space_id,
top_k=top_k,
search_mode=search_mode
)
# Add to sources and raw documents
if source_object:
all_sources.append(source_object)
all_raw_documents.extend(discord_chunks)
# Stream found document count
if streaming_service and writer:
streaming_service.only_update_terminal(f"🗨️ Found {len(discord_chunks)} Discord messages related to your query")
writer({"yeild_value": streaming_service._format_annotations()})


except Exception as e:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def get_citation_system_prompt():
- YOUTUBE_VIDEO: "YouTube video transcripts and metadata" (personally saved videos)
- GITHUB_CONNECTOR: "GitHub repository content and issues" (personal repositories and interactions)
- LINEAR_CONNECTOR: "Linear project issues and discussions" (personal project management)
- DISCORD_CONNECTOR: "Discord server messages and channels" (personal community interactions)
- TAVILY_API: "Tavily search API results" (personalized search results)
- LINKUP_API: "Linkup search API results" (personalized search results)
</knowledge_sources>
Expand Down
Loading