Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addressing the FR raised against the package to add the title field t… #17

Merged
merged 9 commits into from
Jan 22, 2025
6 changes: 5 additions & 1 deletion dbt_project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,16 @@ vars:
hubspot_contact: "{{ source('rag_hubspot', 'contact') }}"
hubspot_deal: "{{ source('rag_hubspot', 'deal') }}"
hubspot_owner: "{{ source('rag_hubspot', 'owner') }}"

rag_using_zendesk: true
rag_using_jira: true
rag__using_hubspot: true
fivetran-jamie marked this conversation as resolved.
Show resolved Hide resolved

models:
unified_rag:
+schema: unified_rag
intermediate:
+materialized: ephemeral
+materialized: view # change back to ephemeral
fivetran-jamie marked this conversation as resolved.
Show resolved Hide resolved
unstructured:
+materialized: view
staging:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ engagement_emails as (
engagement_email.owner_id,
engagement_email.team_id,
engagement_email.body,
engagement_email.email_subject,
engagement_email.title,
engagement_email.email_to_email,
engagement_email.email_cc_email,
engagement_email.email_from_email as commenter_email,
Expand All @@ -66,6 +66,7 @@ engagement_notes as (
engagement_note.occurred_timestamp,
engagement_note.owner_id,
engagement_note.team_id,
engagement_note.title,
engagement_note.body,
owners.owner_name,
owners.owner_email
Expand All @@ -81,11 +82,11 @@ email_comment_details as (
engagement_deals.engagement_id as deal_comment_id,
engagement_deals.deal_id,
engagement_deals.source_relation,
deals.deal_name,
coalesce(deals.title, engagement_emails.title) as title,
{{ unified_rag.coalesce_cast(["engagement_emails.commenter_email", "'UNKNOWN'"], dbt.type_string()) }} as commenter_email,
{{ unified_rag.coalesce_cast(["engagement_emails.commenter_name", "'UNKNOWN'"], dbt.type_string()) }} as commenter_name,
engagement_emails.title as email_title,
engagement_emails.created_timestamp as comment_time,
engagement_emails.email_subject,
engagement_emails.body as comment_body
from deals
left join engagement_deals
Expand All @@ -102,9 +103,10 @@ note_comment_details as (
engagement_deals.engagement_id as deal_comment_id,
deals.deal_id,
deals.source_relation,
deals.deal_name,
coalesce(deals.title, engagement_notes.title) as title,
{{ unified_rag.coalesce_cast(["engagement_notes.owner_email", "'UNKNOWN'"], dbt.type_string()) }} as commenter_email,
{{ unified_rag.coalesce_cast(["engagement_notes.owner_name", "'UNKNOWN'"], dbt.type_string()) }} as commenter_name,
engagement_notes.title as engagement_note_title,
engagement_notes.created_timestamp as comment_time,
engagement_notes.body as comment_body
from deals
Expand All @@ -122,11 +124,12 @@ comment_markdowns as (
select
deal_comment_id,
deal_id,
email_title as title,
fivetran-jamie marked this conversation as resolved.
Show resolved Hide resolved
source_relation,
comment_time,
fivetran-poonamagate marked this conversation as resolved.
Show resolved Hide resolved
cast(
{{ dbt.concat([
"'Email subject:'", "email_subject", "'\\n'",
"'Email subject:'", "email_title", "'\\n'",
"'### message from '", "commenter_name", "' ('", "commenter_email", "')\\n'",
"'##### sent @ '", "comment_time", "'\\n'",
"comment_body"
Expand All @@ -139,6 +142,7 @@ comment_markdowns as (
select
deal_comment_id,
deal_id,
engagement_note_title as title,
fivetran-jamie marked this conversation as resolved.
Show resolved Hide resolved
source_relation,
comment_time,
cast(
Expand Down Expand Up @@ -166,6 +170,7 @@ truncated_comments as (
select
deal_comment_id,
deal_id,
title,
source_relation,
comment_time,
case when comment_tokens > {{ var('document_max_tokens', 5000) }} then left(comment_markdown, {{ var('document_max_tokens', 5000) }} * 4) -- approximate 4 characters per token
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ grouped_comment_documents as (

select
deal_id,
title,
source_relation,
comment_markdown,
comment_tokens,
Expand All @@ -24,6 +25,7 @@ grouped_comment_documents as (

select
deal_id,
title,
source_relation,
cast({{ dbt_utils.safe_divide('floor(cumulative_length - 1)', var('document_max_tokens', 5000)) }} as {{ dbt.type_int() }}) as chunk_index,
max(comment_time) as most_recent_chunk_update,
Expand All @@ -34,4 +36,4 @@ select
) }} as comments_group_markdown,
sum(comment_tokens) as chunk_tokens
from grouped_comment_documents
group by 1,2,3
group by 1,2,3,4
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ engagement_detail_prep as (

select
deals.deal_id,
deals.deal_name,
deals.title,
{{ unified_rag.coalesce_cast(["engagements.engagement_type", "'UNKNOWN'"], dbt.type_string()) }} as engagement_type,
{{ dbt.concat(["'https://app.hubspot.com/contacts'", "deals.portal_id", "'/record/0-3/'", "deals.deal_id"]) }} as url_reference,
deals.source_relation,
Expand Down Expand Up @@ -77,7 +77,7 @@ engagement_detail_prep as (
engagement_details as (
select
deal_id,
deal_name,
title,
url_reference,
created_on,
source_relation,
Expand All @@ -93,10 +93,11 @@ engagement_markdown as (

select
deal_id,
title,
source_relation,
url_reference,
{{ dbt.concat([
"'Deal Name : '", "deal_name", "'\\n\\n'",
"'Deal Name : '", "title", "'\\n\\n'",
"'Created By : '", "contact_name", "' ('", "created_by", "')\\n'",
"'Created On : '", "created_on", "'\\n'",
"'Company Name: '", "company_name", "'\\n'",
Expand Down
5 changes: 3 additions & 2 deletions models/intermediate/jira/int_rag_jira__issue_document.sql
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ issue_details as (

select
issues.issue_id,
issues.issue_name,
issues.title,
{% if var('jira_subdomain', default=None) %}
{{ dbt.concat(["'https://'", "jira_subdomain_value", "'.atlassian.net/browse/'", "issues.issue_key"]) }} as url_reference,
{% else %}
Expand Down Expand Up @@ -66,10 +66,11 @@ final as (

select
issue_id,
title,
source_relation,
url_reference,
{{ dbt.concat([
"'# issue : '", "issue_name", "'\\n\\n'",
"'# issue : '", "title", "'\\n\\n'",
"'Created By : '", "user_name", "' ('", "created_by", "')\\n'",
"'Created On : '", "created_on", "'\\n'",
"'Status : '", "status", "'\\n'",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ with tickets as (
tickets.ticket_id,
replace(replace(cast(tickets.url as {{ dbt.type_string() }}), '/api/v2/tickets/', '/agent/tickets/'), '.json', '') as url_reference,
tickets.source_relation,
tickets.subject as ticket_name,
tickets.title,
{{ unified_rag.coalesce_cast(["users.name", "'UNKNOWN'"], dbt.type_string()) }} as user_name,
{{ unified_rag.coalesce_cast(["users.email", "'UNKNOWN'"], dbt.type_string()) }} as created_by,
tickets.created_at as created_on,
Expand All @@ -29,10 +29,11 @@ with tickets as (
), final as (
select
ticket_id,
title,
source_relation,
url_reference,
{{ dbt.concat([
"'# Ticket : '", "ticket_name", "'\\n\\n'",
"'# Ticket : '", "title", "'\\n\\n'",
"'Created By : '", "user_name", "' ('", "created_by", "')\\n'",
"'Created On : '", "created_on", "'\\n'",
"'Status : '", "status", "'\\n'",
Expand Down
7 changes: 4 additions & 3 deletions models/rag__unified_document.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@
{%- set queries = [] -%}

{% for platform in enabled_variables %}
{% if var(platform) == true -%}
{% if var(platform, true) == true -%}
{%- set platform_name = platform | replace('rag__using_', '') -%}
{%- set unique_key_fields = ['document_id', 'platform', 'chunk_index', 'source_relation'] -%}
{% set select_statement = (
"select \n" ~
" " ~ dbt_utils.generate_surrogate_key(unique_key_fields) ~ "as unique_id, \n" ~
" " ~ dbt_utils.generate_surrogate_key(unique_key_fields) ~ " as unique_id, \n" ~
" document_id, \n" ~
" title, \n" ~
" url_reference, \n" ~
" platform, \n" ~
" source_relation, \n" ~
Expand All @@ -44,4 +45,4 @@
{{ queries | join(' union all ') }}
{%- else -%}
{{ queries[0] }}
{%- endif -%}
{%- endif %}
6 changes: 4 additions & 2 deletions models/staging/hubspot_staging/stg_rag_hubspot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ models:
description: The ID of the deal's pipeline stage.
- name: owner_id
description: The ID of the deal's owner.
- name: deal_name
- name: title
description: The name you have given this deal.
- name: description
description: A brief description of the deal.
Expand Down Expand Up @@ -204,7 +204,7 @@ models:
description: The source of the record if the unioning functionality is being used. If it is not this field will be empty.
- name: body
description: The body of the email.
- name: email_subject
- name: title
description: The subject line of the logged email.
- name: email_to_email
description: The email addresses of the email's recipients.
Expand All @@ -222,6 +222,8 @@ models:
description: Boolean to mark rows that were deleted in the source database.
- name: engagement_id
description: The ID of the engagement.
- name: title
description: The title of the engagement note.
- name: engagement_type
description: The type of the engagement.
- name: body
Expand Down
2 changes: 1 addition & 1 deletion models/staging/hubspot_staging/stg_rag_hubspot__deal.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ fields as (
final as (

select
deal_name,
deal_name as title,
fivetran-joemarkiewicz marked this conversation as resolved.
Show resolved Hide resolved
source_relation,
cast(closed_date as {{ dbt.type_timestamp() }}) as closed_date,
cast(created_date as {{ dbt.type_timestamp() }}) as created_date,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ final as (
owner_id,
team_id,
coalesce(body_preview, body_preview_html, email_text, email_html) as body,
email_subject,
email_subject as title,
fivetran-joemarkiewicz marked this conversation as resolved.
Show resolved Hide resolved
email_to_email,
email_from_email,
email_cc_email
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ final as (
occurred_timestamp,
owner_id,
team_id,
coalesce(note_body_preview, note_body, note_body_preview_html) as body
coalesce(note_body_preview, note_body, note_body_preview_html) as body,
'engagement_note' as title
from fields
)

Expand Down
2 changes: 1 addition & 1 deletion models/staging/jira_staging/stg_rag_jira.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ models:
in the project's workflow).
- name: status_changed_at
description: Timestamp of when the status was last changed.
- name: issue_name
- name: title
description: Title of the issue.
- name: time_spent_seconds
description: The time that was spent working on this issue, in seconds.
Expand Down
2 changes: 1 addition & 1 deletion models/staging/jira_staging/stg_rag_jira__issue.sql
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ final as (
resolution as resolution_id,
status as status_id,
cast(status_category_changed as {{ dbt.type_timestamp() }}) as status_changed_at,
summary as issue_name,
summary as title,
fivetran-joemarkiewicz marked this conversation as resolved.
Show resolved Hide resolved
cast(updated as {{ dbt.type_timestamp() }}) as updated_at,
work_ratio,
_fivetran_synced
Expand Down
2 changes: 1 addition & 1 deletion models/staging/zendesk_staging/stg_rag_zendesk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ models:
description: When this record was created
- name: type
description: The type of this ticket, possible values are problem, incident, question or task
- name: subject
- name: title
description: The value of the subject field for this ticket
- name: description
description: Read-only first comment on the ticket
Expand Down
2 changes: 1 addition & 1 deletion models/staging/zendesk_staging/stg_rag_zendesk__ticket.sql
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ final as (
recipient,
requester_id,
status,
subject,
subject as title,
fivetran-joemarkiewicz marked this conversation as resolved.
Show resolved Hide resolved
problem_id,
submitter_id,
ticket_form_id,
Expand Down
3 changes: 3 additions & 0 deletions models/unified_rag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ models:
- not_null
- name: document_id
description: Identifier of the base object which the unstructured data is associated (ie. Zendesk ticket_id, Jira issue_id, and HubSpot deal_id).
- name: title
description: Title of the base object which the unstructured data is associated to.
- name: platform
- name: url_reference
description: URL reference to the respective base object.
- name: platform
Expand Down
1 change: 1 addition & 0 deletions models/unstructured/rag_hubspot__document.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ final as (

select
cast(deal_document.deal_id as {{ dbt.type_string() }}) as document_id,
coalesce(deal_document.title, grouped.title) as title,
deal_document.url_reference,
'hubspot' as platform,
deal_document.source_relation,
Expand Down
1 change: 1 addition & 0 deletions models/unstructured/rag_jira__document.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ final as (

select
cast(issue_document.issue_id as {{ dbt.type_string() }}) as document_id,
issue_document.title,
issue_document.url_reference,
'jira' as platform,
issue_document.source_relation,
Expand Down
1 change: 1 addition & 0 deletions models/unstructured/rag_zendesk__document.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ with ticket_document as (
), final as (
select
cast(ticket_document.ticket_id as {{ dbt.type_string() }}) as document_id,
ticket_document.title,
ticket_document.url_reference,
'zendesk' as platform,
ticket_document.source_relation,
Expand Down
6 changes: 6 additions & 0 deletions models/unstructured/unstructured.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ models:
columns:
- name: document_id
description: Equivalent to deal_id.
- name: title
description: The title of the respective HubSpot deal. If the deal has no title, this is the engagement email subject line or the string 'engagement_note'.
- name: url_reference
description: URL reference to the respective deal in HubSpot.
- name: platform
Expand All @@ -26,6 +28,8 @@ models:
columns:
- name: document_id
description: Equivalent to issue_id.
- name: title
description: The title of the respective Jira issue.
- name: url_reference
description: URL reference to the respective Jira Issue.
- name: platform
Expand All @@ -46,6 +50,8 @@ models:
columns:
- name: document_id
description: Equivalent to ticket_id.
- name: title
description: The subject of the respective Zendesk ticket.
- name: url_reference
description: URL reference to the respective Zendesk Ticket.
- name: platform
Expand Down