#!/usr/bin/env python3 """ Script to test dataset-to-dashboard relationships from Superset API. Usage: cd backend && .venv/bin/python3 src/scripts/test_dataset_dashboard_relations.py """ import json import sys from pathlib import Path # Add src to path (parent of scripts directory) sys.path.append(str(Path(__file__).parent.parent.parent)) from src.core.superset_client import SupersetClient from src.core.config_manager import ConfigManager from src.core.logger import logger def test_dashboard_dataset_relations(): """Test fetching dataset-to-dashboard relationships.""" # Load environment from existing config config_manager = ConfigManager() environments = config_manager.get_environments() if not environments: logger.error("No environments configured!") return # Use first available environment env = environments[0] logger.info(f"Using environment: {env.name} ({env.url})") client = SupersetClient(env) try: # Authenticate logger.info("Authenticating to Superset...") client.authenticate() logger.info("Authentication successful!") # Test dashboard ID 13 dashboard_id = 13 logger.info(f"\n=== Fetching Dashboard {dashboard_id} ===") dashboard = client.network.request(method="GET", endpoint=f"/dashboard/{dashboard_id}") print("\nDashboard structure:") print(f" ID: {dashboard.get('id')}") print(f" Title: {dashboard.get('dashboard_title')}") print(f" Published: {dashboard.get('published')}") # Check for slices/charts if 'slices' in dashboard: logger.info(f"\n Found {len(dashboard['slices'])} slices/charts in dashboard") for i, slice_data in enumerate(dashboard['slices'][:5]): # Show first 5 print(f" Slice {i+1}:") print(f" ID: {slice_data.get('slice_id')}") print(f" Name: {slice_data.get('slice_name')}") # Check for datasource_id if 'datasource_id' in slice_data: print(f" Datasource ID: {slice_data['datasource_id']}") if 'datasource_name' in slice_data: print(f" Datasource Name: {slice_data['datasource_name']}") if 'datasource_type' in slice_data: print(f" Datasource Type: {slice_data['datasource_type']}") else: logger.warning(" No 'slices' field found in dashboard response") logger.info(f" Available fields: {list(dashboard.keys())}") # Test dataset ID 26 dataset_id = 26 logger.info(f"\n=== Fetching Dataset {dataset_id} ===") dataset = client.get_dataset(dataset_id) print("\nDataset structure:") print(f" ID: {dataset.get('id')}") print(f" Table Name: {dataset.get('table_name')}") print(f" Schema: {dataset.get('schema')}") print(f" Database: {dataset.get('database', {}).get('database_name', 'Unknown')}") # Check for dashboards that use this dataset logger.info(f"\n=== Finding Dashboards using Dataset {dataset_id} ===") # Method: Use Superset's related_objects API try: logger.info(f" Using /api/v1/dataset/{dataset_id}/related_objects endpoint...") related_objects = client.network.request( method="GET", endpoint=f"/dataset/{dataset_id}/related_objects" ) logger.info(f" Related objects response type: {type(related_objects)}") logger.info(f" Related objects keys: {list(related_objects.keys()) if isinstance(related_objects, dict) else 'N/A'}") # Check for dashboards in related objects if 'dashboards' in related_objects: dashboards = related_objects['dashboards'] logger.info(f" Found {len(dashboards)} dashboards using this dataset:") for dash in dashboards: logger.info(f" - Dashboard ID {dash.get('id')}: {dash.get('dashboard_title', dash.get('title', 'Unknown'))}") elif 'result' in related_objects: # Some Superset versions use 'result' wrapper result = related_objects['result'] if 'dashboards' in result: dashboards = result['dashboards'] logger.info(f" Found {len(dashboards)} dashboards using this dataset:") for dash in dashboards: logger.info(f" - Dashboard ID {dash.get('id')}: {dash.get('dashboard_title', dash.get('title', 'Unknown'))}") else: logger.warning(f" No 'dashboards' key in result. Keys: {list(result.keys())}") else: logger.warning(f" No 'dashboards' key in response. Available keys: {list(related_objects.keys())}") logger.info(f" Full related_objects response:") print(json.dumps(related_objects, indent=2, default=str)[:1000]) except Exception as e: logger.error(f" Error fetching related objects: {e}") import traceback traceback.print_exc() # Method 2: Try to use the position_json from dashboard logger.info(f"\n=== Analyzing Dashboard Position JSON ===") if 'position_json' in dashboard: position_data = json.loads(dashboard['position_json']) logger.info(f" Position data type: {type(position_data)}") # Look for datasource references datasource_ids = set() if isinstance(position_data, dict): for key, value in position_data.items(): if 'datasource' in key.lower() or key == 'DASHBOARD_VERSION_KEY': logger.debug(f" Key: {key}, Value type: {type(value)}") elif isinstance(position_data, list): logger.info(f" Position data has {len(position_data)} items") for item in position_data[:3]: # Show first 3 logger.debug(f" Item: {type(item)}, keys: {list(item.keys()) if isinstance(item, dict) else 'N/A'}") if isinstance(item, dict): if 'datasource_id' in item: datasource_ids.add(item['datasource_id']) if datasource_ids: logger.info(f" Found datasource IDs: {datasource_ids}") # Save full response for analysis output_file = Path(__file__).parent / "dataset_dashboard_analysis.json" with open(output_file, 'w') as f: json.dump({ 'dashboard': dashboard, 'dataset': dataset }, f, indent=2, default=str) logger.info(f"\nFull response saved to: {output_file}") except Exception as e: logger.error(f"Error: {e}", exc_info=True) raise if __name__ == "__main__": test_dashboard_dataset_relations()