Skip to content

[ISSUE] Missing data from Genie API #1154

@huyhuyvu01

Description

@huyhuyvu01

Description
I have notice data missing in Genie API/Genie Databrick SDK when compared to the UI.

Reproduction
Here a simple code to compare the response from Databricks API and SDK, both seem to be missing compared to the console.

import requests
import dotenv
import os
import json
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.dashboards import GenieAPI

dotenv.load_dotenv()

DATABRICKS_TOKEN = os.getenv("DATABRICKS_TOKEN")
DATABRICKS_HOST = os.getenv("DATABRICKS_HOST")
SPACE_ID = "REDACTED"
QUESTION = "Do visitors with certain dominant topics engage differently by content type?"

HEADERS = {
    "Authorization": f"Bearer {DATABRICKS_TOKEN}",
    "Content-Type": "application/json"
}

def run_api():
    print(f"\n[{'='*10} API RUN {'='*10}]")
    url = f"{DATABRICKS_HOST}/api/2.0/genie/spaces/{SPACE_ID}/start-conversation"
    payload = {
        "content": QUESTION
    }
    
    print(f"POST {url}")
    try:
        resp = requests.post(url, headers=HEADERS, json=payload)
        resp.raise_for_status()
        data = resp.json()
        print("Initial Response JSON:")
        print(json.dumps(data, indent=2))
        
        # Poll for completion
        conversation_id = data.get("conversation_id")
        message_id = data.get("message_id")
        
        if conversation_id and message_id:
            import time
            print("\nPolling for completion...")
            while True:
                poll_url = f"{DATABRICKS_HOST}/api/2.0/genie/spaces/{SPACE_ID}/conversations/{conversation_id}/messages/{message_id}"
                poll_resp = requests.get(poll_url, headers=HEADERS)
                poll_resp.raise_for_status()
                poll_data = poll_resp.json()
                status = poll_data.get("status")
                print(f"Status: {status}")
                print("Intermediate JSON:")
                print(json.dumps(poll_data, indent=2))
                
                if status in ["COMPLETED", "FAILED", "CANCELLED"]:
                    data = poll_data
                    break
                time.sleep(2)
        
        print("Final API Response JSON:")
        print(json.dumps(data, indent=2))
        return data
    except requests.exceptions.HTTPError as e:
        print(f"HTTP Error: {e}")
        try:
            print(f"Response text: {resp.text}")
        except:
            pass
    except Exception as e:
        print(f"Error: {e}")

def run_sdk():
    print(f"\n[{'='*10} SDK RUN {'='*10}]")
    w = WorkspaceClient(
        host=DATABRICKS_HOST,
        token=DATABRICKS_TOKEN,
        auth_type="pat"
    )
    genie = GenieAPI(w.api_client)
    
    try:
        print(f"Calling start_conversation_and_wait with space_id={SPACE_ID}, content='{QUESTION}'")
        res = genie.start_conversation_and_wait(space_id=SPACE_ID, content=QUESTION)
        print("Response Object (as dict):")
        print(json.dumps(res.as_dict(), indent=2))
        return res
    except Exception as e:
        print(f"SDK Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":

    run_api()
    run_sdk()

Refenence image from the console:

Image

Expected behavior
Return full data as in the chat Console of Databrick

Debug Logs
The SDK logs helpful debugging information when debug logging is enabled. Set the log level to debug by adding logging.basicConfig(level=logging.DEBUG) to your program, and include the logs here.

Other Information

  • OS: Ubuntu 20.04
  • Version: 0.74.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions