fix
This commit is contained in:
@@ -12,9 +12,9 @@
|
||||
|
||||
# [SECTION: IMPORTS]
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Dict
|
||||
from pydantic import BaseModel, Field
|
||||
from ...dependencies import get_config_manager, get_task_manager, get_resource_service, has_permission
|
||||
from ...dependencies import get_config_manager, get_task_manager, get_resource_service, get_mapping_service, has_permission
|
||||
from ...core.logger import logger, belief_scope
|
||||
# [/SECTION]
|
||||
|
||||
@@ -47,26 +47,44 @@ class DashboardItem(BaseModel):
|
||||
class DashboardsResponse(BaseModel):
|
||||
dashboards: List[DashboardItem]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
# [/DEF:DashboardsResponse:DataClass]
|
||||
|
||||
# [DEF:get_dashboards:Function]
|
||||
# @PURPOSE: Fetch list of dashboards from a specific environment with Git status and last task status
|
||||
# @PRE: env_id must be a valid environment ID
|
||||
# @POST: Returns a list of dashboards with enhanced metadata
|
||||
# @PRE: page must be >= 1 if provided
|
||||
# @PRE: page_size must be between 1 and 100 if provided
|
||||
# @POST: Returns a list of dashboards with enhanced metadata and pagination info
|
||||
# @POST: Response includes pagination metadata (page, page_size, total, total_pages)
|
||||
# @PARAM: env_id (str) - The environment ID to fetch dashboards from
|
||||
# @PARAM: search (Optional[str]) - Filter by title/slug
|
||||
# @PARAM: page (Optional[int]) - Page number (default: 1)
|
||||
# @PARAM: page_size (Optional[int]) - Items per page (default: 10, max: 100)
|
||||
# @RETURN: DashboardsResponse - List of dashboards with status metadata
|
||||
# @RELATION: CALLS -> ResourceService.get_dashboards_with_status
|
||||
@router.get("/api/dashboards", response_model=DashboardsResponse)
|
||||
async def get_dashboards(
|
||||
env_id: str,
|
||||
search: Optional[str] = None,
|
||||
page: int = 1,
|
||||
page_size: int = 10,
|
||||
config_manager=Depends(get_config_manager),
|
||||
task_manager=Depends(get_task_manager),
|
||||
resource_service=Depends(get_resource_service),
|
||||
_ = Depends(has_permission("plugin:migration", "READ"))
|
||||
):
|
||||
with belief_scope("get_dashboards", f"env_id={env_id}, search={search}"):
|
||||
with belief_scope("get_dashboards", f"env_id={env_id}, search={search}, page={page}, page_size={page_size}"):
|
||||
# Validate pagination parameters
|
||||
if page < 1:
|
||||
logger.error(f"[get_dashboards][Coherence:Failed] Invalid page: {page}")
|
||||
raise HTTPException(status_code=400, detail="Page must be >= 1")
|
||||
if page_size < 1 or page_size > 100:
|
||||
logger.error(f"[get_dashboards][Coherence:Failed] Invalid page_size: {page_size}")
|
||||
raise HTTPException(status_code=400, detail="Page size must be between 1 and 100")
|
||||
|
||||
# Validate environment exists
|
||||
environments = config_manager.get_environments()
|
||||
env = next((e for e in environments if e.id == env_id), None)
|
||||
@@ -90,11 +108,23 @@ async def get_dashboards(
|
||||
or search_lower in d.get('slug', '').lower()
|
||||
]
|
||||
|
||||
logger.info(f"[get_dashboards][Coherence:OK] Returning {len(dashboards)} dashboards")
|
||||
# Calculate pagination
|
||||
total = len(dashboards)
|
||||
total_pages = (total + page_size - 1) // page_size if total > 0 else 1
|
||||
start_idx = (page - 1) * page_size
|
||||
end_idx = start_idx + page_size
|
||||
|
||||
# Slice dashboards for current page
|
||||
paginated_dashboards = dashboards[start_idx:end_idx]
|
||||
|
||||
logger.info(f"[get_dashboards][Coherence:OK] Returning {len(paginated_dashboards)} dashboards (page {page}/{total_pages}, total: {total})")
|
||||
|
||||
return DashboardsResponse(
|
||||
dashboards=dashboards,
|
||||
total=len(dashboards)
|
||||
dashboards=paginated_dashboards,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -102,4 +132,192 @@ async def get_dashboards(
|
||||
raise HTTPException(status_code=503, detail=f"Failed to fetch dashboards: {str(e)}")
|
||||
# [/DEF:get_dashboards:Function]
|
||||
|
||||
# [DEF:MigrateRequest:DataClass]
|
||||
class MigrateRequest(BaseModel):
|
||||
source_env_id: str = Field(..., description="Source environment ID")
|
||||
target_env_id: str = Field(..., description="Target environment ID")
|
||||
dashboard_ids: List[int] = Field(..., description="List of dashboard IDs to migrate")
|
||||
db_mappings: Optional[Dict[str, str]] = Field(None, description="Database mappings for migration")
|
||||
replace_db_config: bool = Field(False, description="Replace database configuration")
|
||||
# [/DEF:MigrateRequest:DataClass]
|
||||
|
||||
# [DEF:TaskResponse:DataClass]
|
||||
class TaskResponse(BaseModel):
|
||||
task_id: str
|
||||
# [/DEF:TaskResponse:DataClass]
|
||||
|
||||
# [DEF:migrate_dashboards:Function]
|
||||
# @PURPOSE: Trigger bulk migration of dashboards from source to target environment
|
||||
# @PRE: User has permission plugin:migration:execute
|
||||
# @PRE: source_env_id and target_env_id are valid environment IDs
|
||||
# @PRE: dashboard_ids is a non-empty list
|
||||
# @POST: Returns task_id for tracking migration progress
|
||||
# @POST: Task is created and queued for execution
|
||||
# @PARAM: request (MigrateRequest) - Migration request with source, target, and dashboard IDs
|
||||
# @RETURN: TaskResponse - Task ID for tracking
|
||||
# @RELATION: DISPATCHES -> MigrationPlugin
|
||||
# @RELATION: CALLS -> task_manager.create_task
|
||||
@router.post("/api/dashboards/migrate", response_model=TaskResponse)
|
||||
async def migrate_dashboards(
|
||||
request: MigrateRequest,
|
||||
config_manager=Depends(get_config_manager),
|
||||
task_manager=Depends(get_task_manager),
|
||||
_ = Depends(has_permission("plugin:migration", "EXECUTE"))
|
||||
):
|
||||
with belief_scope("migrate_dashboards", f"source={request.source_env_id}, target={request.target_env_id}, count={len(request.dashboard_ids)}"):
|
||||
# Validate request
|
||||
if not request.dashboard_ids:
|
||||
logger.error("[migrate_dashboards][Coherence:Failed] No dashboard IDs provided")
|
||||
raise HTTPException(status_code=400, detail="At least one dashboard ID must be provided")
|
||||
|
||||
# Validate environments exist
|
||||
environments = config_manager.get_environments()
|
||||
source_env = next((e for e in environments if e.id == request.source_env_id), None)
|
||||
target_env = next((e for e in environments if e.id == request.target_env_id), None)
|
||||
|
||||
if not source_env:
|
||||
logger.error(f"[migrate_dashboards][Coherence:Failed] Source environment not found: {request.source_env_id}")
|
||||
raise HTTPException(status_code=404, detail="Source environment not found")
|
||||
if not target_env:
|
||||
logger.error(f"[migrate_dashboards][Coherence:Failed] Target environment not found: {request.target_env_id}")
|
||||
raise HTTPException(status_code=404, detail="Target environment not found")
|
||||
|
||||
try:
|
||||
# Create migration task
|
||||
task_params = {
|
||||
'source_env_id': request.source_env_id,
|
||||
'target_env_id': request.target_env_id,
|
||||
'dashboards': request.dashboard_ids,
|
||||
'replace_db_config': request.replace_db_config,
|
||||
'db_mappings': request.db_mappings or {}
|
||||
}
|
||||
|
||||
task_id = await task_manager.create_task(
|
||||
plugin_id='superset-migration',
|
||||
params=task_params
|
||||
)
|
||||
|
||||
logger.info(f"[migrate_dashboards][Coherence:OK] Migration task created: {task_id} for {len(request.dashboard_ids)} dashboards")
|
||||
|
||||
return TaskResponse(task_id=str(task_id))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[migrate_dashboards][Coherence:Failed] Failed to create migration task: {e}")
|
||||
raise HTTPException(status_code=503, detail=f"Failed to create migration task: {str(e)}")
|
||||
# [/DEF:migrate_dashboards:Function]
|
||||
|
||||
# [DEF:BackupRequest:DataClass]
|
||||
class BackupRequest(BaseModel):
|
||||
env_id: str = Field(..., description="Environment ID")
|
||||
dashboard_ids: List[int] = Field(..., description="List of dashboard IDs to backup")
|
||||
schedule: Optional[str] = Field(None, description="Cron schedule for recurring backups (e.g., '0 0 * * *')")
|
||||
# [/DEF:BackupRequest:DataClass]
|
||||
|
||||
# [DEF:backup_dashboards:Function]
|
||||
# @PURPOSE: Trigger bulk backup of dashboards with optional cron schedule
|
||||
# @PRE: User has permission plugin:backup:execute
|
||||
# @PRE: env_id is a valid environment ID
|
||||
# @PRE: dashboard_ids is a non-empty list
|
||||
# @POST: Returns task_id for tracking backup progress
|
||||
# @POST: Task is created and queued for execution
|
||||
# @POST: If schedule is provided, a scheduled task is created
|
||||
# @PARAM: request (BackupRequest) - Backup request with environment and dashboard IDs
|
||||
# @RETURN: TaskResponse - Task ID for tracking
|
||||
# @RELATION: DISPATCHES -> BackupPlugin
|
||||
# @RELATION: CALLS -> task_manager.create_task
|
||||
@router.post("/api/dashboards/backup", response_model=TaskResponse)
|
||||
async def backup_dashboards(
|
||||
request: BackupRequest,
|
||||
config_manager=Depends(get_config_manager),
|
||||
task_manager=Depends(get_task_manager),
|
||||
_ = Depends(has_permission("plugin:backup", "EXECUTE"))
|
||||
):
|
||||
with belief_scope("backup_dashboards", f"env={request.env_id}, count={len(request.dashboard_ids)}, schedule={request.schedule}"):
|
||||
# Validate request
|
||||
if not request.dashboard_ids:
|
||||
logger.error("[backup_dashboards][Coherence:Failed] No dashboard IDs provided")
|
||||
raise HTTPException(status_code=400, detail="At least one dashboard ID must be provided")
|
||||
|
||||
# Validate environment exists
|
||||
environments = config_manager.get_environments()
|
||||
env = next((e for e in environments if e.id == request.env_id), None)
|
||||
|
||||
if not env:
|
||||
logger.error(f"[backup_dashboards][Coherence:Failed] Environment not found: {request.env_id}")
|
||||
raise HTTPException(status_code=404, detail="Environment not found")
|
||||
|
||||
try:
|
||||
# Create backup task
|
||||
task_params = {
|
||||
'env': request.env_id,
|
||||
'dashboards': request.dashboard_ids,
|
||||
'schedule': request.schedule
|
||||
}
|
||||
|
||||
task_id = await task_manager.create_task(
|
||||
plugin_id='superset-backup',
|
||||
params=task_params
|
||||
)
|
||||
|
||||
logger.info(f"[backup_dashboards][Coherence:OK] Backup task created: {task_id} for {len(request.dashboard_ids)} dashboards")
|
||||
|
||||
return TaskResponse(task_id=str(task_id))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[backup_dashboards][Coherence:Failed] Failed to create backup task: {e}")
|
||||
raise HTTPException(status_code=503, detail=f"Failed to create backup task: {str(e)}")
|
||||
# [/DEF:backup_dashboards:Function]
|
||||
|
||||
# [DEF:DatabaseMapping:DataClass]
|
||||
class DatabaseMapping(BaseModel):
|
||||
source_db: str
|
||||
target_db: str
|
||||
confidence: float
|
||||
# [/DEF:DatabaseMapping:DataClass]
|
||||
|
||||
# [DEF:DatabaseMappingsResponse:DataClass]
|
||||
class DatabaseMappingsResponse(BaseModel):
|
||||
mappings: List[DatabaseMapping]
|
||||
# [/DEF:DatabaseMappingsResponse:DataClass]
|
||||
|
||||
# [DEF:get_database_mappings:Function]
|
||||
# @PURPOSE: Get database mapping suggestions between source and target environments
|
||||
# @PRE: User has permission plugin:migration:read
|
||||
# @PRE: source_env_id and target_env_id are valid environment IDs
|
||||
# @POST: Returns list of suggested database mappings with confidence scores
|
||||
# @PARAM: source_env_id (str) - Source environment ID
|
||||
# @PARAM: target_env_id (str) - Target environment ID
|
||||
# @RETURN: DatabaseMappingsResponse - List of suggested mappings
|
||||
# @RELATION: CALLS -> MappingService.get_suggestions
|
||||
@router.get("/api/dashboards/db-mappings", response_model=DatabaseMappingsResponse)
|
||||
async def get_database_mappings(
|
||||
source_env_id: str,
|
||||
target_env_id: str,
|
||||
mapping_service=Depends(get_mapping_service),
|
||||
_ = Depends(has_permission("plugin:migration", "READ"))
|
||||
):
|
||||
with belief_scope("get_database_mappings", f"source={source_env_id}, target={target_env_id}"):
|
||||
try:
|
||||
# Get mapping suggestions using MappingService
|
||||
suggestions = await mapping_service.get_suggestions(source_env_id, target_env_id)
|
||||
|
||||
# Format suggestions as DatabaseMapping objects
|
||||
mappings = [
|
||||
DatabaseMapping(
|
||||
source_db=s.get('source_db', ''),
|
||||
target_db=s.get('target_db', ''),
|
||||
confidence=s.get('confidence', 0.0)
|
||||
)
|
||||
for s in suggestions
|
||||
]
|
||||
|
||||
logger.info(f"[get_database_mappings][Coherence:OK] Returning {len(mappings)} database mapping suggestions")
|
||||
|
||||
return DatabaseMappingsResponse(mappings=mappings)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[get_database_mappings][Coherence:Failed] Failed to get database mappings: {e}")
|
||||
raise HTTPException(status_code=503, detail=f"Failed to get database mappings: {str(e)}")
|
||||
# [/DEF:get_database_mappings:Function]
|
||||
|
||||
# [/DEF:backend.src.api.routes.dashboards:Module]
|
||||
|
||||
@@ -16,6 +16,7 @@ from typing import List, Optional
|
||||
from pydantic import BaseModel, Field
|
||||
from ...dependencies import get_config_manager, get_task_manager, get_resource_service, has_permission
|
||||
from ...core.logger import logger, belief_scope
|
||||
from ...core.superset_client import SupersetClient
|
||||
# [/SECTION]
|
||||
|
||||
router = APIRouter()
|
||||
@@ -42,22 +43,64 @@ class DatasetItem(BaseModel):
|
||||
last_task: Optional[LastTask] = None
|
||||
# [/DEF:DatasetItem:DataClass]
|
||||
|
||||
# [DEF:LinkedDashboard:DataClass]
|
||||
class LinkedDashboard(BaseModel):
|
||||
id: int
|
||||
title: str
|
||||
slug: Optional[str] = None
|
||||
# [/DEF:LinkedDashboard:DataClass]
|
||||
|
||||
# [DEF:DatasetColumn:DataClass]
|
||||
class DatasetColumn(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
type: Optional[str] = None
|
||||
is_dttm: bool = False
|
||||
is_active: bool = True
|
||||
description: Optional[str] = None
|
||||
# [/DEF:DatasetColumn:DataClass]
|
||||
|
||||
# [DEF:DatasetDetailResponse:DataClass]
|
||||
class DatasetDetailResponse(BaseModel):
|
||||
id: int
|
||||
table_name: str
|
||||
schema: str
|
||||
database: str
|
||||
description: Optional[str] = None
|
||||
columns: List[DatasetColumn]
|
||||
column_count: int
|
||||
sql: Optional[str] = None
|
||||
linked_dashboards: List[LinkedDashboard]
|
||||
linked_dashboard_count: int
|
||||
is_sqllab_view: bool = False
|
||||
created_on: Optional[str] = None
|
||||
changed_on: Optional[str] = None
|
||||
# [/DEF:DatasetDetailResponse:DataClass]
|
||||
|
||||
# [DEF:DatasetsResponse:DataClass]
|
||||
class DatasetsResponse(BaseModel):
|
||||
datasets: List[DatasetItem]
|
||||
total: int
|
||||
page: int
|
||||
page_size: int
|
||||
total_pages: int
|
||||
# [/DEF:DatasetsResponse:DataClass]
|
||||
|
||||
# [DEF:get_datasets:Function]
|
||||
# @PURPOSE: Fetch list of datasets from a specific environment with mapping progress
|
||||
# @PRE: env_id must be a valid environment ID
|
||||
# @POST: Returns a list of datasets with enhanced metadata
|
||||
# @PARAM: env_id (str) - The environment ID to fetch datasets from
|
||||
# @PARAM: search (Optional[str]) - Filter by table name
|
||||
# @RETURN: DatasetsResponse - List of datasets with status metadata
|
||||
# [DEF:TaskResponse:DataClass]
|
||||
class TaskResponse(BaseModel):
|
||||
task_id: str
|
||||
# [/DEF:TaskResponse:DataClass]
|
||||
|
||||
# [DEF:get_dataset_ids:Function]
|
||||
# @PURPOSE: Fetch list of all dataset IDs from a specific environment (without pagination)
|
||||
# @PRE: env_id must be a valid environment ID
|
||||
# @POST: Returns a list of all dataset IDs
|
||||
# @PARAM: env_id (str) - The environment ID to fetch datasets from
|
||||
# @PARAM: search (Optional[str]) - Filter by table name
|
||||
# @RETURN: List[int] - List of dataset IDs
|
||||
# @RELATION: CALLS -> ResourceService.get_datasets_with_status
|
||||
@router.get("/api/datasets", response_model=DatasetsResponse)
|
||||
async def get_datasets(
|
||||
@router.get("/api/datasets/ids")
|
||||
async def get_dataset_ids(
|
||||
env_id: str,
|
||||
search: Optional[str] = None,
|
||||
config_manager=Depends(get_config_manager),
|
||||
@@ -65,7 +108,73 @@ async def get_datasets(
|
||||
resource_service=Depends(get_resource_service),
|
||||
_ = Depends(has_permission("plugin:migration", "READ"))
|
||||
):
|
||||
with belief_scope("get_datasets", f"env_id={env_id}, search={search}"):
|
||||
with belief_scope("get_dataset_ids", f"env_id={env_id}, search={search}"):
|
||||
# Validate environment exists
|
||||
environments = config_manager.get_environments()
|
||||
env = next((e for e in environments if e.id == env_id), None)
|
||||
if not env:
|
||||
logger.error(f"[get_dataset_ids][Coherence:Failed] Environment not found: {env_id}")
|
||||
raise HTTPException(status_code=404, detail="Environment not found")
|
||||
|
||||
try:
|
||||
# Get all tasks for status lookup
|
||||
all_tasks = task_manager.get_all_tasks()
|
||||
|
||||
# Fetch datasets with status using ResourceService
|
||||
datasets = await resource_service.get_datasets_with_status(env, all_tasks)
|
||||
|
||||
# Apply search filter if provided
|
||||
if search:
|
||||
search_lower = search.lower()
|
||||
datasets = [
|
||||
d for d in datasets
|
||||
if search_lower in d.get('table_name', '').lower()
|
||||
]
|
||||
|
||||
# Extract and return just the IDs
|
||||
dataset_ids = [d['id'] for d in datasets]
|
||||
logger.info(f"[get_dataset_ids][Coherence:OK] Returning {len(dataset_ids)} dataset IDs")
|
||||
|
||||
return {"dataset_ids": dataset_ids}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[get_dataset_ids][Coherence:Failed] Failed to fetch dataset IDs: {e}")
|
||||
raise HTTPException(status_code=503, detail=f"Failed to fetch dataset IDs: {str(e)}")
|
||||
# [/DEF:get_dataset_ids:Function]
|
||||
|
||||
# [DEF:get_datasets:Function]
|
||||
# @PURPOSE: Fetch list of datasets from a specific environment with mapping progress
|
||||
# @PRE: env_id must be a valid environment ID
|
||||
# @PRE: page must be >= 1 if provided
|
||||
# @PRE: page_size must be between 1 and 100 if provided
|
||||
# @POST: Returns a list of datasets with enhanced metadata and pagination info
|
||||
# @POST: Response includes pagination metadata (page, page_size, total, total_pages)
|
||||
# @PARAM: env_id (str) - The environment ID to fetch datasets from
|
||||
# @PARAM: search (Optional[str]) - Filter by table name
|
||||
# @PARAM: page (Optional[int]) - Page number (default: 1)
|
||||
# @PARAM: page_size (Optional[int]) - Items per page (default: 10, max: 100)
|
||||
# @RETURN: DatasetsResponse - List of datasets with status metadata
|
||||
# @RELATION: CALLS -> ResourceService.get_datasets_with_status
|
||||
@router.get("/api/datasets", response_model=DatasetsResponse)
|
||||
async def get_datasets(
|
||||
env_id: str,
|
||||
search: Optional[str] = None,
|
||||
page: int = 1,
|
||||
page_size: int = 10,
|
||||
config_manager=Depends(get_config_manager),
|
||||
task_manager=Depends(get_task_manager),
|
||||
resource_service=Depends(get_resource_service),
|
||||
_ = Depends(has_permission("plugin:migration", "READ"))
|
||||
):
|
||||
with belief_scope("get_datasets", f"env_id={env_id}, search={search}, page={page}, page_size={page_size}"):
|
||||
# Validate pagination parameters
|
||||
if page < 1:
|
||||
logger.error(f"[get_datasets][Coherence:Failed] Invalid page: {page}")
|
||||
raise HTTPException(status_code=400, detail="Page must be >= 1")
|
||||
if page_size < 1 or page_size > 100:
|
||||
logger.error(f"[get_datasets][Coherence:Failed] Invalid page_size: {page_size}")
|
||||
raise HTTPException(status_code=400, detail="Page size must be between 1 and 100")
|
||||
|
||||
# Validate environment exists
|
||||
environments = config_manager.get_environments()
|
||||
env = next((e for e in environments if e.id == env_id), None)
|
||||
@@ -88,11 +197,23 @@ async def get_datasets(
|
||||
if search_lower in d.get('table_name', '').lower()
|
||||
]
|
||||
|
||||
logger.info(f"[get_datasets][Coherence:OK] Returning {len(datasets)} datasets")
|
||||
# Calculate pagination
|
||||
total = len(datasets)
|
||||
total_pages = (total + page_size - 1) // page_size if total > 0 else 1
|
||||
start_idx = (page - 1) * page_size
|
||||
end_idx = start_idx + page_size
|
||||
|
||||
# Slice datasets for current page
|
||||
paginated_datasets = datasets[start_idx:end_idx]
|
||||
|
||||
logger.info(f"[get_datasets][Coherence:OK] Returning {len(paginated_datasets)} datasets (page {page}/{total_pages}, total: {total})")
|
||||
|
||||
return DatasetsResponse(
|
||||
datasets=datasets,
|
||||
total=len(datasets)
|
||||
datasets=paginated_datasets,
|
||||
total=total,
|
||||
page=page,
|
||||
page_size=page_size,
|
||||
total_pages=total_pages
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -100,4 +221,175 @@ async def get_datasets(
|
||||
raise HTTPException(status_code=503, detail=f"Failed to fetch datasets: {str(e)}")
|
||||
# [/DEF:get_datasets:Function]
|
||||
|
||||
# [DEF:MapColumnsRequest:DataClass]
|
||||
class MapColumnsRequest(BaseModel):
|
||||
env_id: str = Field(..., description="Environment ID")
|
||||
dataset_ids: List[int] = Field(..., description="List of dataset IDs to map")
|
||||
source_type: str = Field(..., description="Source type: 'postgresql' or 'xlsx'")
|
||||
connection_id: Optional[str] = Field(None, description="Connection ID for PostgreSQL source")
|
||||
file_data: Optional[str] = Field(None, description="File path or data for XLSX source")
|
||||
# [/DEF:MapColumnsRequest:DataClass]
|
||||
|
||||
# [DEF:map_columns:Function]
|
||||
# @PURPOSE: Trigger bulk column mapping for datasets
|
||||
# @PRE: User has permission plugin:mapper:execute
|
||||
# @PRE: env_id is a valid environment ID
|
||||
# @PRE: dataset_ids is a non-empty list
|
||||
# @POST: Returns task_id for tracking mapping progress
|
||||
# @POST: Task is created and queued for execution
|
||||
# @PARAM: request (MapColumnsRequest) - Mapping request with environment and dataset IDs
|
||||
# @RETURN: TaskResponse - Task ID for tracking
|
||||
# @RELATION: DISPATCHES -> MapperPlugin
|
||||
# @RELATION: CALLS -> task_manager.create_task
|
||||
@router.post("/api/datasets/map-columns", response_model=TaskResponse)
|
||||
async def map_columns(
|
||||
request: MapColumnsRequest,
|
||||
config_manager=Depends(get_config_manager),
|
||||
task_manager=Depends(get_task_manager),
|
||||
_ = Depends(has_permission("plugin:mapper", "EXECUTE"))
|
||||
):
|
||||
with belief_scope("map_columns", f"env={request.env_id}, count={len(request.dataset_ids)}, source={request.source_type}"):
|
||||
# Validate request
|
||||
if not request.dataset_ids:
|
||||
logger.error("[map_columns][Coherence:Failed] No dataset IDs provided")
|
||||
raise HTTPException(status_code=400, detail="At least one dataset ID must be provided")
|
||||
|
||||
# Validate source type
|
||||
if request.source_type not in ['postgresql', 'xlsx']:
|
||||
logger.error(f"[map_columns][Coherence:Failed] Invalid source type: {request.source_type}")
|
||||
raise HTTPException(status_code=400, detail="Source type must be 'postgresql' or 'xlsx'")
|
||||
|
||||
# Validate environment exists
|
||||
environments = config_manager.get_environments()
|
||||
env = next((e for e in environments if e.id == request.env_id), None)
|
||||
|
||||
if not env:
|
||||
logger.error(f"[map_columns][Coherence:Failed] Environment not found: {request.env_id}")
|
||||
raise HTTPException(status_code=404, detail="Environment not found")
|
||||
|
||||
try:
|
||||
# Create mapping task
|
||||
task_params = {
|
||||
'env_id': request.env_id,
|
||||
'datasets': request.dataset_ids,
|
||||
'source_type': request.source_type,
|
||||
'connection_id': request.connection_id,
|
||||
'file_data': request.file_data
|
||||
}
|
||||
|
||||
task_id = await task_manager.create_task(
|
||||
plugin_id='dataset-mapper',
|
||||
params=task_params
|
||||
)
|
||||
|
||||
logger.info(f"[map_columns][Coherence:OK] Mapping task created: {task_id} for {len(request.dataset_ids)} datasets")
|
||||
|
||||
return TaskResponse(task_id=str(task_id))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[map_columns][Coherence:Failed] Failed to create mapping task: {e}")
|
||||
raise HTTPException(status_code=503, detail=f"Failed to create mapping task: {str(e)}")
|
||||
# [/DEF:map_columns:Function]
|
||||
|
||||
# [DEF:GenerateDocsRequest:DataClass]
|
||||
class GenerateDocsRequest(BaseModel):
|
||||
env_id: str = Field(..., description="Environment ID")
|
||||
dataset_ids: List[int] = Field(..., description="List of dataset IDs to generate docs for")
|
||||
llm_provider: str = Field(..., description="LLM provider to use")
|
||||
options: Optional[dict] = Field(None, description="Additional options for documentation generation")
|
||||
# [/DEF:GenerateDocsRequest:DataClass]
|
||||
|
||||
# [DEF:generate_docs:Function]
|
||||
# @PURPOSE: Trigger bulk documentation generation for datasets
|
||||
# @PRE: User has permission plugin:llm_analysis:execute
|
||||
# @PRE: env_id is a valid environment ID
|
||||
# @PRE: dataset_ids is a non-empty list
|
||||
# @POST: Returns task_id for tracking documentation generation progress
|
||||
# @POST: Task is created and queued for execution
|
||||
# @PARAM: request (GenerateDocsRequest) - Documentation generation request
|
||||
# @RETURN: TaskResponse - Task ID for tracking
|
||||
# @RELATION: DISPATCHES -> LLMAnalysisPlugin
|
||||
# @RELATION: CALLS -> task_manager.create_task
|
||||
@router.post("/api/datasets/generate-docs", response_model=TaskResponse)
|
||||
async def generate_docs(
|
||||
request: GenerateDocsRequest,
|
||||
config_manager=Depends(get_config_manager),
|
||||
task_manager=Depends(get_task_manager),
|
||||
_ = Depends(has_permission("plugin:llm_analysis", "EXECUTE"))
|
||||
):
|
||||
with belief_scope("generate_docs", f"env={request.env_id}, count={len(request.dataset_ids)}, provider={request.llm_provider}"):
|
||||
# Validate request
|
||||
if not request.dataset_ids:
|
||||
logger.error("[generate_docs][Coherence:Failed] No dataset IDs provided")
|
||||
raise HTTPException(status_code=400, detail="At least one dataset ID must be provided")
|
||||
|
||||
# Validate environment exists
|
||||
environments = config_manager.get_environments()
|
||||
env = next((e for e in environments if e.id == request.env_id), None)
|
||||
|
||||
if not env:
|
||||
logger.error(f"[generate_docs][Coherence:Failed] Environment not found: {request.env_id}")
|
||||
raise HTTPException(status_code=404, detail="Environment not found")
|
||||
|
||||
try:
|
||||
# Create documentation generation task
|
||||
task_params = {
|
||||
'env_id': request.env_id,
|
||||
'datasets': request.dataset_ids,
|
||||
'llm_provider': request.llm_provider,
|
||||
'options': request.options or {}
|
||||
}
|
||||
|
||||
task_id = await task_manager.create_task(
|
||||
plugin_id='llm_documentation',
|
||||
params=task_params
|
||||
)
|
||||
|
||||
logger.info(f"[generate_docs][Coherence:OK] Documentation generation task created: {task_id} for {len(request.dataset_ids)} datasets")
|
||||
|
||||
return TaskResponse(task_id=str(task_id))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[generate_docs][Coherence:Failed] Failed to create documentation generation task: {e}")
|
||||
raise HTTPException(status_code=503, detail=f"Failed to create documentation generation task: {str(e)}")
|
||||
# [/DEF:generate_docs:Function]
|
||||
|
||||
# [DEF:get_dataset_detail:Function]
|
||||
# @PURPOSE: Get detailed dataset information including columns and linked dashboards
|
||||
# @PRE: env_id is a valid environment ID
|
||||
# @PRE: dataset_id is a valid dataset ID
|
||||
# @POST: Returns detailed dataset info with columns and linked dashboards
|
||||
# @PARAM: env_id (str) - The environment ID
|
||||
# @PARAM: dataset_id (int) - The dataset ID
|
||||
# @RETURN: DatasetDetailResponse - Detailed dataset information
|
||||
# @RELATION: CALLS -> SupersetClient.get_dataset_detail
|
||||
@router.get("/api/datasets/{dataset_id}", response_model=DatasetDetailResponse)
|
||||
async def get_dataset_detail(
|
||||
env_id: str,
|
||||
dataset_id: int,
|
||||
config_manager=Depends(get_config_manager),
|
||||
_ = Depends(has_permission("plugin:migration", "READ"))
|
||||
):
|
||||
with belief_scope("get_dataset_detail", f"env_id={env_id}, dataset_id={dataset_id}"):
|
||||
# Validate environment exists
|
||||
environments = config_manager.get_environments()
|
||||
env = next((e for e in environments if e.id == env_id), None)
|
||||
if not env:
|
||||
logger.error(f"[get_dataset_detail][Coherence:Failed] Environment not found: {env_id}")
|
||||
raise HTTPException(status_code=404, detail="Environment not found")
|
||||
|
||||
try:
|
||||
# Fetch detailed dataset info using SupersetClient
|
||||
client = SupersetClient(env)
|
||||
dataset_detail = client.get_dataset_detail(dataset_id)
|
||||
|
||||
logger.info(f"[get_dataset_detail][Coherence:OK] Retrieved dataset {dataset_id} with {dataset_detail['column_count']} columns and {dataset_detail['linked_dashboard_count']} linked dashboards")
|
||||
|
||||
return DatasetDetailResponse(**dataset_detail)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[get_dataset_detail][Coherence:Failed] Failed to fetch dataset detail: {e}")
|
||||
raise HTTPException(status_code=503, detail=f"Failed to fetch dataset detail: {str(e)}")
|
||||
# [/DEF:get_dataset_detail:Function]
|
||||
|
||||
# [/DEF:backend.src.api.routes.datasets:Module]
|
||||
|
||||
@@ -303,11 +303,11 @@ async def get_consolidated_settings(
|
||||
config = config_manager.get_config()
|
||||
|
||||
return ConsolidatedSettingsResponse(
|
||||
environments=config.environments,
|
||||
environments=[env.dict() for env in config.environments],
|
||||
connections=config.settings.connections,
|
||||
llm=config.settings.llm,
|
||||
logging=config.settings.logging,
|
||||
storage=config.settings.storage
|
||||
logging=config.settings.logging.dict(),
|
||||
storage=config.settings.storage.dict()
|
||||
)
|
||||
# [/DEF:get_consolidated_settings:Function]
|
||||
|
||||
|
||||
@@ -115,7 +115,7 @@ app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"])
|
||||
app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"])
|
||||
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
|
||||
app.include_router(connections.router, prefix="/api/settings/connections", tags=["Connections"])
|
||||
app.include_router(environments.router, prefix="/api/environments", tags=["Environments"])
|
||||
app.include_router(environments.router, prefix="/api/settings/environments", tags=["Environments"])
|
||||
app.include_router(mappings.router)
|
||||
app.include_router(migration.router)
|
||||
app.include_router(git.router)
|
||||
|
||||
@@ -48,6 +48,8 @@ class GlobalSettings(BaseModel):
|
||||
storage: StorageConfig = Field(default_factory=StorageConfig)
|
||||
default_environment_id: Optional[str] = None
|
||||
logging: LoggingConfig = Field(default_factory=LoggingConfig)
|
||||
connections: List[dict] = []
|
||||
llm: dict = Field(default_factory=lambda: {"providers": [], "default_provider": ""})
|
||||
|
||||
# Task retention settings
|
||||
task_retention_days: int = 30
|
||||
|
||||
@@ -236,6 +236,82 @@ class SupersetClient:
|
||||
return result
|
||||
# [/DEF:get_datasets_summary:Function]
|
||||
|
||||
# [DEF:get_dataset_detail:Function]
|
||||
# @PURPOSE: Fetches detailed dataset information including columns and linked dashboards
|
||||
# @PRE: Client is authenticated and dataset_id exists.
|
||||
# @POST: Returns detailed dataset info with columns and linked dashboards.
|
||||
# @PARAM: dataset_id (int) - The dataset ID to fetch details for.
|
||||
# @RETURN: Dict - Dataset details with columns and linked_dashboards.
|
||||
# @RELATION: CALLS -> self.get_dataset
|
||||
# @RELATION: CALLS -> self.network.request (for related_objects)
|
||||
def get_dataset_detail(self, dataset_id: int) -> Dict:
|
||||
with belief_scope("SupersetClient.get_dataset_detail", f"id={dataset_id}"):
|
||||
# Get base dataset info
|
||||
dataset = self.get_dataset(dataset_id)
|
||||
|
||||
# Extract columns information
|
||||
columns = dataset.get("columns", [])
|
||||
column_info = []
|
||||
for col in columns:
|
||||
column_info.append({
|
||||
"id": col.get("id"),
|
||||
"name": col.get("column_name"),
|
||||
"type": col.get("type"),
|
||||
"is_dttm": col.get("is_dttm", False),
|
||||
"is_active": col.get("is_active", True),
|
||||
"description": col.get("description", "")
|
||||
})
|
||||
|
||||
# Get linked dashboards using related_objects endpoint
|
||||
linked_dashboards = []
|
||||
try:
|
||||
related_objects = self.network.request(
|
||||
method="GET",
|
||||
endpoint=f"/dataset/{dataset_id}/related_objects"
|
||||
)
|
||||
|
||||
# Handle different response formats
|
||||
if isinstance(related_objects, dict):
|
||||
if "dashboards" in related_objects:
|
||||
dashboards_data = related_objects["dashboards"]
|
||||
elif "result" in related_objects and isinstance(related_objects["result"], dict):
|
||||
dashboards_data = related_objects["result"].get("dashboards", [])
|
||||
else:
|
||||
dashboards_data = []
|
||||
|
||||
for dash in dashboards_data:
|
||||
linked_dashboards.append({
|
||||
"id": dash.get("id"),
|
||||
"title": dash.get("dashboard_title") or dash.get("title", "Unknown"),
|
||||
"slug": dash.get("slug")
|
||||
})
|
||||
except Exception as e:
|
||||
app_logger.warning(f"[get_dataset_detail][Warning] Failed to fetch related dashboards: {e}")
|
||||
linked_dashboards = []
|
||||
|
||||
# Extract SQL table information
|
||||
sql = dataset.get("sql", "")
|
||||
|
||||
result = {
|
||||
"id": dataset.get("id"),
|
||||
"table_name": dataset.get("table_name"),
|
||||
"schema": dataset.get("schema"),
|
||||
"database": dataset.get("database", {}).get("database_name", "Unknown"),
|
||||
"description": dataset.get("description", ""),
|
||||
"columns": column_info,
|
||||
"column_count": len(column_info),
|
||||
"sql": sql,
|
||||
"linked_dashboards": linked_dashboards,
|
||||
"linked_dashboard_count": len(linked_dashboards),
|
||||
"is_sqllab_view": dataset.get("is_sqllab_view", False),
|
||||
"created_on": dataset.get("created_on"),
|
||||
"changed_on": dataset.get("changed_on")
|
||||
}
|
||||
|
||||
app_logger.info(f"[get_dataset_detail][Exit] Got dataset {dataset_id} with {len(column_info)} columns and {len(linked_dashboards)} linked dashboards")
|
||||
return result
|
||||
# [/DEF:get_dataset_detail:Function]
|
||||
|
||||
# [DEF:get_dataset:Function]
|
||||
# @PURPOSE: Получает информацию о конкретном датасете по его ID.
|
||||
# @PARAM: dataset_id (int) - ID датасета.
|
||||
|
||||
@@ -42,6 +42,8 @@ def suggest_mappings(source_databases: List[Dict], target_databases: List[Dict],
|
||||
name, score, index = match
|
||||
if score >= threshold:
|
||||
suggestions.append({
|
||||
"source_db": s_db['database_name'],
|
||||
"target_db": target_databases[index]['database_name'],
|
||||
"source_db_uuid": s_db['uuid'],
|
||||
"target_db_uuid": target_databases[index]['uuid'],
|
||||
"confidence": score / 100.0
|
||||
|
||||
@@ -118,14 +118,41 @@ class APIClient:
|
||||
def _init_session(self) -> requests.Session:
|
||||
with belief_scope("_init_session"):
|
||||
session = requests.Session()
|
||||
|
||||
# Create a custom adapter that handles TLS issues
|
||||
class TLSAdapter(HTTPAdapter):
|
||||
def init_poolmanager(self, connections, maxsize, block=False):
|
||||
from urllib3.poolmanager import PoolManager
|
||||
import ssl
|
||||
|
||||
# Create an SSL context that ignores TLSv1 unrecognized name errors
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.set_ciphers('HIGH:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!SRP:!CAMELLIA')
|
||||
|
||||
# Ignore TLSV1_UNRECOGNIZED_NAME errors by disabling hostname verification
|
||||
# This is safe when verify_ssl is false (we're already not verifying the certificate)
|
||||
ctx.check_hostname = False
|
||||
|
||||
self.poolmanager = PoolManager(
|
||||
num_pools=connections,
|
||||
maxsize=maxsize,
|
||||
block=block,
|
||||
ssl_context=ctx
|
||||
)
|
||||
|
||||
retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
|
||||
adapter = HTTPAdapter(max_retries=retries)
|
||||
adapter = TLSAdapter(max_retries=retries)
|
||||
session.mount('http://', adapter)
|
||||
session.mount('https://', adapter)
|
||||
|
||||
if not self.request_settings["verify_ssl"]:
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
app_logger.warning("[_init_session][State] SSL verification disabled.")
|
||||
session.verify = self.request_settings["verify_ssl"]
|
||||
# When verify_ssl is false, we should also disable hostname verification
|
||||
session.verify = False
|
||||
else:
|
||||
session.verify = True
|
||||
|
||||
return session
|
||||
# [/DEF:_init_session:Function]
|
||||
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
# [DEF:Dependencies:Module]
|
||||
# @SEMANTICS: dependency, injection, singleton, factory, auth, jwt
|
||||
# @PURPOSE: Manages the creation and provision of shared application dependencies, such as the PluginLoader and TaskManager, to avoid circular imports.
|
||||
# @PURPOSE: Manages creation and provision of shared application dependencies, such as PluginLoader and TaskManager, to avoid circular imports.
|
||||
# @LAYER: Core
|
||||
# @RELATION: Used by the main app and API routers to get access to shared instances.
|
||||
# @RELATION: Used by main app and API routers to get access to shared instances.
|
||||
|
||||
from pathlib import Path
|
||||
from fastapi import Depends, HTTPException, status
|
||||
@@ -13,6 +13,7 @@ from .core.task_manager import TaskManager
|
||||
from .core.config_manager import ConfigManager
|
||||
from .core.scheduler import SchedulerService
|
||||
from .services.resource_service import ResourceService
|
||||
from .services.mapping_service import MappingService
|
||||
from .core.database import init_db, get_auth_db
|
||||
from .core.logger import logger
|
||||
from .core.auth.jwt import decode_token
|
||||
@@ -29,12 +30,12 @@ config_manager = ConfigManager(config_path=str(config_path))
|
||||
init_db()
|
||||
|
||||
# [DEF:get_config_manager:Function]
|
||||
# @PURPOSE: Dependency injector for the ConfigManager.
|
||||
# @PURPOSE: Dependency injector for ConfigManager.
|
||||
# @PRE: Global config_manager must be initialized.
|
||||
# @POST: Returns shared ConfigManager instance.
|
||||
# @RETURN: ConfigManager - The shared config manager instance.
|
||||
def get_config_manager() -> ConfigManager:
|
||||
"""Dependency injector for the ConfigManager."""
|
||||
"""Dependency injector for ConfigManager."""
|
||||
return config_manager
|
||||
# [/DEF:get_config_manager:Function]
|
||||
|
||||
@@ -54,54 +55,64 @@ resource_service = ResourceService()
|
||||
logger.info("ResourceService initialized")
|
||||
|
||||
# [DEF:get_plugin_loader:Function]
|
||||
# @PURPOSE: Dependency injector for the PluginLoader.
|
||||
# @PURPOSE: Dependency injector for PluginLoader.
|
||||
# @PRE: Global plugin_loader must be initialized.
|
||||
# @POST: Returns shared PluginLoader instance.
|
||||
# @RETURN: PluginLoader - The shared plugin loader instance.
|
||||
def get_plugin_loader() -> PluginLoader:
|
||||
"""Dependency injector for the PluginLoader."""
|
||||
"""Dependency injector for PluginLoader."""
|
||||
return plugin_loader
|
||||
# [/DEF:get_plugin_loader:Function]
|
||||
|
||||
# [DEF:get_task_manager:Function]
|
||||
# @PURPOSE: Dependency injector for the TaskManager.
|
||||
# @PURPOSE: Dependency injector for TaskManager.
|
||||
# @PRE: Global task_manager must be initialized.
|
||||
# @POST: Returns shared TaskManager instance.
|
||||
# @RETURN: TaskManager - The shared task manager instance.
|
||||
def get_task_manager() -> TaskManager:
|
||||
"""Dependency injector for the TaskManager."""
|
||||
"""Dependency injector for TaskManager."""
|
||||
return task_manager
|
||||
# [/DEF:get_task_manager:Function]
|
||||
|
||||
# [DEF:get_scheduler_service:Function]
|
||||
# @PURPOSE: Dependency injector for the SchedulerService.
|
||||
# @PURPOSE: Dependency injector for SchedulerService.
|
||||
# @PRE: Global scheduler_service must be initialized.
|
||||
# @POST: Returns shared SchedulerService instance.
|
||||
# @RETURN: SchedulerService - The shared scheduler service instance.
|
||||
def get_scheduler_service() -> SchedulerService:
|
||||
"""Dependency injector for the SchedulerService."""
|
||||
"""Dependency injector for SchedulerService."""
|
||||
return scheduler_service
|
||||
# [/DEF:get_scheduler_service:Function]
|
||||
|
||||
# [DEF:get_resource_service:Function]
|
||||
# @PURPOSE: Dependency injector for the ResourceService.
|
||||
# @PURPOSE: Dependency injector for ResourceService.
|
||||
# @PRE: Global resource_service must be initialized.
|
||||
# @POST: Returns shared ResourceService instance.
|
||||
# @RETURN: ResourceService - The shared resource service instance.
|
||||
def get_resource_service() -> ResourceService:
|
||||
"""Dependency injector for the ResourceService."""
|
||||
"""Dependency injector for ResourceService."""
|
||||
return resource_service
|
||||
# [/DEF:get_resource_service:Function]
|
||||
|
||||
# [DEF:get_mapping_service:Function]
|
||||
# @PURPOSE: Dependency injector for MappingService.
|
||||
# @PRE: Global config_manager must be initialized.
|
||||
# @POST: Returns new MappingService instance.
|
||||
# @RETURN: MappingService - A new mapping service instance.
|
||||
def get_mapping_service() -> MappingService:
|
||||
"""Dependency injector for MappingService."""
|
||||
return MappingService(config_manager)
|
||||
# [/DEF:get_mapping_service:Function]
|
||||
|
||||
# [DEF:oauth2_scheme:Variable]
|
||||
# @PURPOSE: OAuth2 password bearer scheme for token extraction.
|
||||
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login")
|
||||
# [/DEF:oauth2_scheme:Variable]
|
||||
|
||||
# [DEF:get_current_user:Function]
|
||||
# @PURPOSE: Dependency for retrieving the currently authenticated user from a JWT.
|
||||
# @PURPOSE: Dependency for retrieving currently authenticated user from a JWT.
|
||||
# @PRE: JWT token provided in Authorization header.
|
||||
# @POST: Returns the User object if token is valid.
|
||||
# @POST: Returns User object if token is valid.
|
||||
# @THROW: HTTPException 401 if token is invalid or user not found.
|
||||
# @PARAM: token (str) - Extracted JWT token.
|
||||
# @PARAM: db (Session) - Auth database session.
|
||||
@@ -157,4 +168,4 @@ def has_permission(resource: str, action: str):
|
||||
return permission_checker
|
||||
# [/DEF:has_permission:Function]
|
||||
|
||||
# [/DEF:Dependencies:Module]
|
||||
# [/DEF:Dependencies:Module]
|
||||
|
||||
163
backend/src/scripts/test_dataset_dashboard_relations.py
Normal file
163
backend/src/scripts/test_dataset_dashboard_relations.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to test dataset-to-dashboard relationships from Superset API.
|
||||
|
||||
Usage:
|
||||
cd backend && .venv/bin/python3 src/scripts/test_dataset_dashboard_relations.py
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add src to path (parent of scripts directory)
|
||||
sys.path.append(str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from src.core.superset_client import SupersetClient
|
||||
from src.core.config_manager import ConfigManager
|
||||
from src.core.logger import logger
|
||||
|
||||
|
||||
def test_dashboard_dataset_relations():
|
||||
"""Test fetching dataset-to-dashboard relationships."""
|
||||
|
||||
# Load environment from existing config
|
||||
config_manager = ConfigManager()
|
||||
environments = config_manager.get_environments()
|
||||
|
||||
if not environments:
|
||||
logger.error("No environments configured!")
|
||||
return
|
||||
|
||||
# Use first available environment
|
||||
env = environments[0]
|
||||
logger.info(f"Using environment: {env.name} ({env.url})")
|
||||
|
||||
client = SupersetClient(env)
|
||||
|
||||
try:
|
||||
# Authenticate
|
||||
logger.info("Authenticating to Superset...")
|
||||
client.authenticate()
|
||||
logger.info("Authentication successful!")
|
||||
|
||||
# Test dashboard ID 13
|
||||
dashboard_id = 13
|
||||
logger.info(f"\n=== Fetching Dashboard {dashboard_id} ===")
|
||||
dashboard = client.network.request(method="GET", endpoint=f"/dashboard/{dashboard_id}")
|
||||
|
||||
print("\nDashboard structure:")
|
||||
print(f" ID: {dashboard.get('id')}")
|
||||
print(f" Title: {dashboard.get('dashboard_title')}")
|
||||
print(f" Published: {dashboard.get('published')}")
|
||||
|
||||
# Check for slices/charts
|
||||
if 'slices' in dashboard:
|
||||
logger.info(f"\n Found {len(dashboard['slices'])} slices/charts in dashboard")
|
||||
for i, slice_data in enumerate(dashboard['slices'][:5]): # Show first 5
|
||||
print(f" Slice {i+1}:")
|
||||
print(f" ID: {slice_data.get('slice_id')}")
|
||||
print(f" Name: {slice_data.get('slice_name')}")
|
||||
# Check for datasource_id
|
||||
if 'datasource_id' in slice_data:
|
||||
print(f" Datasource ID: {slice_data['datasource_id']}")
|
||||
if 'datasource_name' in slice_data:
|
||||
print(f" Datasource Name: {slice_data['datasource_name']}")
|
||||
if 'datasource_type' in slice_data:
|
||||
print(f" Datasource Type: {slice_data['datasource_type']}")
|
||||
else:
|
||||
logger.warning(" No 'slices' field found in dashboard response")
|
||||
logger.info(f" Available fields: {list(dashboard.keys())}")
|
||||
|
||||
# Test dataset ID 26
|
||||
dataset_id = 26
|
||||
logger.info(f"\n=== Fetching Dataset {dataset_id} ===")
|
||||
dataset = client.get_dataset(dataset_id)
|
||||
|
||||
print("\nDataset structure:")
|
||||
print(f" ID: {dataset.get('id')}")
|
||||
print(f" Table Name: {dataset.get('table_name')}")
|
||||
print(f" Schema: {dataset.get('schema')}")
|
||||
print(f" Database: {dataset.get('database', {}).get('database_name', 'Unknown')}")
|
||||
|
||||
# Check for dashboards that use this dataset
|
||||
logger.info(f"\n=== Finding Dashboards using Dataset {dataset_id} ===")
|
||||
|
||||
# Method: Use Superset's related_objects API
|
||||
try:
|
||||
logger.info(f" Using /api/v1/dataset/{dataset_id}/related_objects endpoint...")
|
||||
related_objects = client.network.request(
|
||||
method="GET",
|
||||
endpoint=f"/dataset/{dataset_id}/related_objects"
|
||||
)
|
||||
|
||||
logger.info(f" Related objects response type: {type(related_objects)}")
|
||||
logger.info(f" Related objects keys: {list(related_objects.keys()) if isinstance(related_objects, dict) else 'N/A'}")
|
||||
|
||||
# Check for dashboards in related objects
|
||||
if 'dashboards' in related_objects:
|
||||
dashboards = related_objects['dashboards']
|
||||
logger.info(f" Found {len(dashboards)} dashboards using this dataset:")
|
||||
|
||||
for dash in dashboards:
|
||||
logger.info(f" - Dashboard ID {dash.get('id')}: {dash.get('dashboard_title', dash.get('title', 'Unknown'))}")
|
||||
elif 'result' in related_objects:
|
||||
# Some Superset versions use 'result' wrapper
|
||||
result = related_objects['result']
|
||||
if 'dashboards' in result:
|
||||
dashboards = result['dashboards']
|
||||
logger.info(f" Found {len(dashboards)} dashboards using this dataset:")
|
||||
|
||||
for dash in dashboards:
|
||||
logger.info(f" - Dashboard ID {dash.get('id')}: {dash.get('dashboard_title', dash.get('title', 'Unknown'))}")
|
||||
else:
|
||||
logger.warning(f" No 'dashboards' key in result. Keys: {list(result.keys())}")
|
||||
else:
|
||||
logger.warning(f" No 'dashboards' key in response. Available keys: {list(related_objects.keys())}")
|
||||
logger.info(f" Full related_objects response:")
|
||||
print(json.dumps(related_objects, indent=2, default=str)[:1000])
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f" Error fetching related objects: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# Method 2: Try to use the position_json from dashboard
|
||||
logger.info(f"\n=== Analyzing Dashboard Position JSON ===")
|
||||
if 'position_json' in dashboard:
|
||||
position_data = json.loads(dashboard['position_json'])
|
||||
logger.info(f" Position data type: {type(position_data)}")
|
||||
|
||||
# Look for datasource references
|
||||
datasource_ids = set()
|
||||
if isinstance(position_data, dict):
|
||||
for key, value in position_data.items():
|
||||
if 'datasource' in key.lower() or key == 'DASHBOARD_VERSION_KEY':
|
||||
logger.debug(f" Key: {key}, Value type: {type(value)}")
|
||||
elif isinstance(position_data, list):
|
||||
logger.info(f" Position data has {len(position_data)} items")
|
||||
for item in position_data[:3]: # Show first 3
|
||||
logger.debug(f" Item: {type(item)}, keys: {list(item.keys()) if isinstance(item, dict) else 'N/A'}")
|
||||
if isinstance(item, dict):
|
||||
if 'datasource_id' in item:
|
||||
datasource_ids.add(item['datasource_id'])
|
||||
|
||||
if datasource_ids:
|
||||
logger.info(f" Found datasource IDs: {datasource_ids}")
|
||||
|
||||
# Save full response for analysis
|
||||
output_file = Path(__file__).parent / "dataset_dashboard_analysis.json"
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump({
|
||||
'dashboard': dashboard,
|
||||
'dataset': dataset
|
||||
}, f, indent=2, default=str)
|
||||
logger.info(f"\nFull response saved to: {output_file}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_dashboard_dataset_relations()
|
||||
Reference in New Issue
Block a user