This commit is contained in:
2026-02-15 11:11:30 +03:00
parent 4a0273a604
commit 026239e3bf
20 changed files with 60656 additions and 58958 deletions

View File

@@ -12,9 +12,9 @@
# [SECTION: IMPORTS]
from fastapi import APIRouter, Depends, HTTPException
from typing import List, Optional
from typing import List, Optional, Dict
from pydantic import BaseModel, Field
from ...dependencies import get_config_manager, get_task_manager, get_resource_service, has_permission
from ...dependencies import get_config_manager, get_task_manager, get_resource_service, get_mapping_service, has_permission
from ...core.logger import logger, belief_scope
# [/SECTION]
@@ -47,26 +47,44 @@ class DashboardItem(BaseModel):
class DashboardsResponse(BaseModel):
dashboards: List[DashboardItem]
total: int
page: int
page_size: int
total_pages: int
# [/DEF:DashboardsResponse:DataClass]
# [DEF:get_dashboards:Function]
# @PURPOSE: Fetch list of dashboards from a specific environment with Git status and last task status
# @PRE: env_id must be a valid environment ID
# @POST: Returns a list of dashboards with enhanced metadata
# @PRE: page must be >= 1 if provided
# @PRE: page_size must be between 1 and 100 if provided
# @POST: Returns a list of dashboards with enhanced metadata and pagination info
# @POST: Response includes pagination metadata (page, page_size, total, total_pages)
# @PARAM: env_id (str) - The environment ID to fetch dashboards from
# @PARAM: search (Optional[str]) - Filter by title/slug
# @PARAM: page (Optional[int]) - Page number (default: 1)
# @PARAM: page_size (Optional[int]) - Items per page (default: 10, max: 100)
# @RETURN: DashboardsResponse - List of dashboards with status metadata
# @RELATION: CALLS -> ResourceService.get_dashboards_with_status
@router.get("/api/dashboards", response_model=DashboardsResponse)
async def get_dashboards(
env_id: str,
search: Optional[str] = None,
page: int = 1,
page_size: int = 10,
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
resource_service=Depends(get_resource_service),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_dashboards", f"env_id={env_id}, search={search}"):
with belief_scope("get_dashboards", f"env_id={env_id}, search={search}, page={page}, page_size={page_size}"):
# Validate pagination parameters
if page < 1:
logger.error(f"[get_dashboards][Coherence:Failed] Invalid page: {page}")
raise HTTPException(status_code=400, detail="Page must be >= 1")
if page_size < 1 or page_size > 100:
logger.error(f"[get_dashboards][Coherence:Failed] Invalid page_size: {page_size}")
raise HTTPException(status_code=400, detail="Page size must be between 1 and 100")
# Validate environment exists
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == env_id), None)
@@ -90,11 +108,23 @@ async def get_dashboards(
or search_lower in d.get('slug', '').lower()
]
logger.info(f"[get_dashboards][Coherence:OK] Returning {len(dashboards)} dashboards")
# Calculate pagination
total = len(dashboards)
total_pages = (total + page_size - 1) // page_size if total > 0 else 1
start_idx = (page - 1) * page_size
end_idx = start_idx + page_size
# Slice dashboards for current page
paginated_dashboards = dashboards[start_idx:end_idx]
logger.info(f"[get_dashboards][Coherence:OK] Returning {len(paginated_dashboards)} dashboards (page {page}/{total_pages}, total: {total})")
return DashboardsResponse(
dashboards=dashboards,
total=len(dashboards)
dashboards=paginated_dashboards,
total=total,
page=page,
page_size=page_size,
total_pages=total_pages
)
except Exception as e:
@@ -102,4 +132,192 @@ async def get_dashboards(
raise HTTPException(status_code=503, detail=f"Failed to fetch dashboards: {str(e)}")
# [/DEF:get_dashboards:Function]
# [DEF:MigrateRequest:DataClass]
class MigrateRequest(BaseModel):
source_env_id: str = Field(..., description="Source environment ID")
target_env_id: str = Field(..., description="Target environment ID")
dashboard_ids: List[int] = Field(..., description="List of dashboard IDs to migrate")
db_mappings: Optional[Dict[str, str]] = Field(None, description="Database mappings for migration")
replace_db_config: bool = Field(False, description="Replace database configuration")
# [/DEF:MigrateRequest:DataClass]
# [DEF:TaskResponse:DataClass]
class TaskResponse(BaseModel):
task_id: str
# [/DEF:TaskResponse:DataClass]
# [DEF:migrate_dashboards:Function]
# @PURPOSE: Trigger bulk migration of dashboards from source to target environment
# @PRE: User has permission plugin:migration:execute
# @PRE: source_env_id and target_env_id are valid environment IDs
# @PRE: dashboard_ids is a non-empty list
# @POST: Returns task_id for tracking migration progress
# @POST: Task is created and queued for execution
# @PARAM: request (MigrateRequest) - Migration request with source, target, and dashboard IDs
# @RETURN: TaskResponse - Task ID for tracking
# @RELATION: DISPATCHES -> MigrationPlugin
# @RELATION: CALLS -> task_manager.create_task
@router.post("/api/dashboards/migrate", response_model=TaskResponse)
async def migrate_dashboards(
request: MigrateRequest,
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
_ = Depends(has_permission("plugin:migration", "EXECUTE"))
):
with belief_scope("migrate_dashboards", f"source={request.source_env_id}, target={request.target_env_id}, count={len(request.dashboard_ids)}"):
# Validate request
if not request.dashboard_ids:
logger.error("[migrate_dashboards][Coherence:Failed] No dashboard IDs provided")
raise HTTPException(status_code=400, detail="At least one dashboard ID must be provided")
# Validate environments exist
environments = config_manager.get_environments()
source_env = next((e for e in environments if e.id == request.source_env_id), None)
target_env = next((e for e in environments if e.id == request.target_env_id), None)
if not source_env:
logger.error(f"[migrate_dashboards][Coherence:Failed] Source environment not found: {request.source_env_id}")
raise HTTPException(status_code=404, detail="Source environment not found")
if not target_env:
logger.error(f"[migrate_dashboards][Coherence:Failed] Target environment not found: {request.target_env_id}")
raise HTTPException(status_code=404, detail="Target environment not found")
try:
# Create migration task
task_params = {
'source_env_id': request.source_env_id,
'target_env_id': request.target_env_id,
'dashboards': request.dashboard_ids,
'replace_db_config': request.replace_db_config,
'db_mappings': request.db_mappings or {}
}
task_id = await task_manager.create_task(
plugin_id='superset-migration',
params=task_params
)
logger.info(f"[migrate_dashboards][Coherence:OK] Migration task created: {task_id} for {len(request.dashboard_ids)} dashboards")
return TaskResponse(task_id=str(task_id))
except Exception as e:
logger.error(f"[migrate_dashboards][Coherence:Failed] Failed to create migration task: {e}")
raise HTTPException(status_code=503, detail=f"Failed to create migration task: {str(e)}")
# [/DEF:migrate_dashboards:Function]
# [DEF:BackupRequest:DataClass]
class BackupRequest(BaseModel):
env_id: str = Field(..., description="Environment ID")
dashboard_ids: List[int] = Field(..., description="List of dashboard IDs to backup")
schedule: Optional[str] = Field(None, description="Cron schedule for recurring backups (e.g., '0 0 * * *')")
# [/DEF:BackupRequest:DataClass]
# [DEF:backup_dashboards:Function]
# @PURPOSE: Trigger bulk backup of dashboards with optional cron schedule
# @PRE: User has permission plugin:backup:execute
# @PRE: env_id is a valid environment ID
# @PRE: dashboard_ids is a non-empty list
# @POST: Returns task_id for tracking backup progress
# @POST: Task is created and queued for execution
# @POST: If schedule is provided, a scheduled task is created
# @PARAM: request (BackupRequest) - Backup request with environment and dashboard IDs
# @RETURN: TaskResponse - Task ID for tracking
# @RELATION: DISPATCHES -> BackupPlugin
# @RELATION: CALLS -> task_manager.create_task
@router.post("/api/dashboards/backup", response_model=TaskResponse)
async def backup_dashboards(
request: BackupRequest,
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
_ = Depends(has_permission("plugin:backup", "EXECUTE"))
):
with belief_scope("backup_dashboards", f"env={request.env_id}, count={len(request.dashboard_ids)}, schedule={request.schedule}"):
# Validate request
if not request.dashboard_ids:
logger.error("[backup_dashboards][Coherence:Failed] No dashboard IDs provided")
raise HTTPException(status_code=400, detail="At least one dashboard ID must be provided")
# Validate environment exists
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == request.env_id), None)
if not env:
logger.error(f"[backup_dashboards][Coherence:Failed] Environment not found: {request.env_id}")
raise HTTPException(status_code=404, detail="Environment not found")
try:
# Create backup task
task_params = {
'env': request.env_id,
'dashboards': request.dashboard_ids,
'schedule': request.schedule
}
task_id = await task_manager.create_task(
plugin_id='superset-backup',
params=task_params
)
logger.info(f"[backup_dashboards][Coherence:OK] Backup task created: {task_id} for {len(request.dashboard_ids)} dashboards")
return TaskResponse(task_id=str(task_id))
except Exception as e:
logger.error(f"[backup_dashboards][Coherence:Failed] Failed to create backup task: {e}")
raise HTTPException(status_code=503, detail=f"Failed to create backup task: {str(e)}")
# [/DEF:backup_dashboards:Function]
# [DEF:DatabaseMapping:DataClass]
class DatabaseMapping(BaseModel):
source_db: str
target_db: str
confidence: float
# [/DEF:DatabaseMapping:DataClass]
# [DEF:DatabaseMappingsResponse:DataClass]
class DatabaseMappingsResponse(BaseModel):
mappings: List[DatabaseMapping]
# [/DEF:DatabaseMappingsResponse:DataClass]
# [DEF:get_database_mappings:Function]
# @PURPOSE: Get database mapping suggestions between source and target environments
# @PRE: User has permission plugin:migration:read
# @PRE: source_env_id and target_env_id are valid environment IDs
# @POST: Returns list of suggested database mappings with confidence scores
# @PARAM: source_env_id (str) - Source environment ID
# @PARAM: target_env_id (str) - Target environment ID
# @RETURN: DatabaseMappingsResponse - List of suggested mappings
# @RELATION: CALLS -> MappingService.get_suggestions
@router.get("/api/dashboards/db-mappings", response_model=DatabaseMappingsResponse)
async def get_database_mappings(
source_env_id: str,
target_env_id: str,
mapping_service=Depends(get_mapping_service),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_database_mappings", f"source={source_env_id}, target={target_env_id}"):
try:
# Get mapping suggestions using MappingService
suggestions = await mapping_service.get_suggestions(source_env_id, target_env_id)
# Format suggestions as DatabaseMapping objects
mappings = [
DatabaseMapping(
source_db=s.get('source_db', ''),
target_db=s.get('target_db', ''),
confidence=s.get('confidence', 0.0)
)
for s in suggestions
]
logger.info(f"[get_database_mappings][Coherence:OK] Returning {len(mappings)} database mapping suggestions")
return DatabaseMappingsResponse(mappings=mappings)
except Exception as e:
logger.error(f"[get_database_mappings][Coherence:Failed] Failed to get database mappings: {e}")
raise HTTPException(status_code=503, detail=f"Failed to get database mappings: {str(e)}")
# [/DEF:get_database_mappings:Function]
# [/DEF:backend.src.api.routes.dashboards:Module]

View File

@@ -16,6 +16,7 @@ from typing import List, Optional
from pydantic import BaseModel, Field
from ...dependencies import get_config_manager, get_task_manager, get_resource_service, has_permission
from ...core.logger import logger, belief_scope
from ...core.superset_client import SupersetClient
# [/SECTION]
router = APIRouter()
@@ -42,22 +43,64 @@ class DatasetItem(BaseModel):
last_task: Optional[LastTask] = None
# [/DEF:DatasetItem:DataClass]
# [DEF:LinkedDashboard:DataClass]
class LinkedDashboard(BaseModel):
id: int
title: str
slug: Optional[str] = None
# [/DEF:LinkedDashboard:DataClass]
# [DEF:DatasetColumn:DataClass]
class DatasetColumn(BaseModel):
id: int
name: str
type: Optional[str] = None
is_dttm: bool = False
is_active: bool = True
description: Optional[str] = None
# [/DEF:DatasetColumn:DataClass]
# [DEF:DatasetDetailResponse:DataClass]
class DatasetDetailResponse(BaseModel):
id: int
table_name: str
schema: str
database: str
description: Optional[str] = None
columns: List[DatasetColumn]
column_count: int
sql: Optional[str] = None
linked_dashboards: List[LinkedDashboard]
linked_dashboard_count: int
is_sqllab_view: bool = False
created_on: Optional[str] = None
changed_on: Optional[str] = None
# [/DEF:DatasetDetailResponse:DataClass]
# [DEF:DatasetsResponse:DataClass]
class DatasetsResponse(BaseModel):
datasets: List[DatasetItem]
total: int
page: int
page_size: int
total_pages: int
# [/DEF:DatasetsResponse:DataClass]
# [DEF:get_datasets:Function]
# @PURPOSE: Fetch list of datasets from a specific environment with mapping progress
# @PRE: env_id must be a valid environment ID
# @POST: Returns a list of datasets with enhanced metadata
# @PARAM: env_id (str) - The environment ID to fetch datasets from
# @PARAM: search (Optional[str]) - Filter by table name
# @RETURN: DatasetsResponse - List of datasets with status metadata
# [DEF:TaskResponse:DataClass]
class TaskResponse(BaseModel):
task_id: str
# [/DEF:TaskResponse:DataClass]
# [DEF:get_dataset_ids:Function]
# @PURPOSE: Fetch list of all dataset IDs from a specific environment (without pagination)
# @PRE: env_id must be a valid environment ID
# @POST: Returns a list of all dataset IDs
# @PARAM: env_id (str) - The environment ID to fetch datasets from
# @PARAM: search (Optional[str]) - Filter by table name
# @RETURN: List[int] - List of dataset IDs
# @RELATION: CALLS -> ResourceService.get_datasets_with_status
@router.get("/api/datasets", response_model=DatasetsResponse)
async def get_datasets(
@router.get("/api/datasets/ids")
async def get_dataset_ids(
env_id: str,
search: Optional[str] = None,
config_manager=Depends(get_config_manager),
@@ -65,7 +108,73 @@ async def get_datasets(
resource_service=Depends(get_resource_service),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_datasets", f"env_id={env_id}, search={search}"):
with belief_scope("get_dataset_ids", f"env_id={env_id}, search={search}"):
# Validate environment exists
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == env_id), None)
if not env:
logger.error(f"[get_dataset_ids][Coherence:Failed] Environment not found: {env_id}")
raise HTTPException(status_code=404, detail="Environment not found")
try:
# Get all tasks for status lookup
all_tasks = task_manager.get_all_tasks()
# Fetch datasets with status using ResourceService
datasets = await resource_service.get_datasets_with_status(env, all_tasks)
# Apply search filter if provided
if search:
search_lower = search.lower()
datasets = [
d for d in datasets
if search_lower in d.get('table_name', '').lower()
]
# Extract and return just the IDs
dataset_ids = [d['id'] for d in datasets]
logger.info(f"[get_dataset_ids][Coherence:OK] Returning {len(dataset_ids)} dataset IDs")
return {"dataset_ids": dataset_ids}
except Exception as e:
logger.error(f"[get_dataset_ids][Coherence:Failed] Failed to fetch dataset IDs: {e}")
raise HTTPException(status_code=503, detail=f"Failed to fetch dataset IDs: {str(e)}")
# [/DEF:get_dataset_ids:Function]
# [DEF:get_datasets:Function]
# @PURPOSE: Fetch list of datasets from a specific environment with mapping progress
# @PRE: env_id must be a valid environment ID
# @PRE: page must be >= 1 if provided
# @PRE: page_size must be between 1 and 100 if provided
# @POST: Returns a list of datasets with enhanced metadata and pagination info
# @POST: Response includes pagination metadata (page, page_size, total, total_pages)
# @PARAM: env_id (str) - The environment ID to fetch datasets from
# @PARAM: search (Optional[str]) - Filter by table name
# @PARAM: page (Optional[int]) - Page number (default: 1)
# @PARAM: page_size (Optional[int]) - Items per page (default: 10, max: 100)
# @RETURN: DatasetsResponse - List of datasets with status metadata
# @RELATION: CALLS -> ResourceService.get_datasets_with_status
@router.get("/api/datasets", response_model=DatasetsResponse)
async def get_datasets(
env_id: str,
search: Optional[str] = None,
page: int = 1,
page_size: int = 10,
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
resource_service=Depends(get_resource_service),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_datasets", f"env_id={env_id}, search={search}, page={page}, page_size={page_size}"):
# Validate pagination parameters
if page < 1:
logger.error(f"[get_datasets][Coherence:Failed] Invalid page: {page}")
raise HTTPException(status_code=400, detail="Page must be >= 1")
if page_size < 1 or page_size > 100:
logger.error(f"[get_datasets][Coherence:Failed] Invalid page_size: {page_size}")
raise HTTPException(status_code=400, detail="Page size must be between 1 and 100")
# Validate environment exists
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == env_id), None)
@@ -88,11 +197,23 @@ async def get_datasets(
if search_lower in d.get('table_name', '').lower()
]
logger.info(f"[get_datasets][Coherence:OK] Returning {len(datasets)} datasets")
# Calculate pagination
total = len(datasets)
total_pages = (total + page_size - 1) // page_size if total > 0 else 1
start_idx = (page - 1) * page_size
end_idx = start_idx + page_size
# Slice datasets for current page
paginated_datasets = datasets[start_idx:end_idx]
logger.info(f"[get_datasets][Coherence:OK] Returning {len(paginated_datasets)} datasets (page {page}/{total_pages}, total: {total})")
return DatasetsResponse(
datasets=datasets,
total=len(datasets)
datasets=paginated_datasets,
total=total,
page=page,
page_size=page_size,
total_pages=total_pages
)
except Exception as e:
@@ -100,4 +221,175 @@ async def get_datasets(
raise HTTPException(status_code=503, detail=f"Failed to fetch datasets: {str(e)}")
# [/DEF:get_datasets:Function]
# [DEF:MapColumnsRequest:DataClass]
class MapColumnsRequest(BaseModel):
env_id: str = Field(..., description="Environment ID")
dataset_ids: List[int] = Field(..., description="List of dataset IDs to map")
source_type: str = Field(..., description="Source type: 'postgresql' or 'xlsx'")
connection_id: Optional[str] = Field(None, description="Connection ID for PostgreSQL source")
file_data: Optional[str] = Field(None, description="File path or data for XLSX source")
# [/DEF:MapColumnsRequest:DataClass]
# [DEF:map_columns:Function]
# @PURPOSE: Trigger bulk column mapping for datasets
# @PRE: User has permission plugin:mapper:execute
# @PRE: env_id is a valid environment ID
# @PRE: dataset_ids is a non-empty list
# @POST: Returns task_id for tracking mapping progress
# @POST: Task is created and queued for execution
# @PARAM: request (MapColumnsRequest) - Mapping request with environment and dataset IDs
# @RETURN: TaskResponse - Task ID for tracking
# @RELATION: DISPATCHES -> MapperPlugin
# @RELATION: CALLS -> task_manager.create_task
@router.post("/api/datasets/map-columns", response_model=TaskResponse)
async def map_columns(
request: MapColumnsRequest,
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
_ = Depends(has_permission("plugin:mapper", "EXECUTE"))
):
with belief_scope("map_columns", f"env={request.env_id}, count={len(request.dataset_ids)}, source={request.source_type}"):
# Validate request
if not request.dataset_ids:
logger.error("[map_columns][Coherence:Failed] No dataset IDs provided")
raise HTTPException(status_code=400, detail="At least one dataset ID must be provided")
# Validate source type
if request.source_type not in ['postgresql', 'xlsx']:
logger.error(f"[map_columns][Coherence:Failed] Invalid source type: {request.source_type}")
raise HTTPException(status_code=400, detail="Source type must be 'postgresql' or 'xlsx'")
# Validate environment exists
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == request.env_id), None)
if not env:
logger.error(f"[map_columns][Coherence:Failed] Environment not found: {request.env_id}")
raise HTTPException(status_code=404, detail="Environment not found")
try:
# Create mapping task
task_params = {
'env_id': request.env_id,
'datasets': request.dataset_ids,
'source_type': request.source_type,
'connection_id': request.connection_id,
'file_data': request.file_data
}
task_id = await task_manager.create_task(
plugin_id='dataset-mapper',
params=task_params
)
logger.info(f"[map_columns][Coherence:OK] Mapping task created: {task_id} for {len(request.dataset_ids)} datasets")
return TaskResponse(task_id=str(task_id))
except Exception as e:
logger.error(f"[map_columns][Coherence:Failed] Failed to create mapping task: {e}")
raise HTTPException(status_code=503, detail=f"Failed to create mapping task: {str(e)}")
# [/DEF:map_columns:Function]
# [DEF:GenerateDocsRequest:DataClass]
class GenerateDocsRequest(BaseModel):
env_id: str = Field(..., description="Environment ID")
dataset_ids: List[int] = Field(..., description="List of dataset IDs to generate docs for")
llm_provider: str = Field(..., description="LLM provider to use")
options: Optional[dict] = Field(None, description="Additional options for documentation generation")
# [/DEF:GenerateDocsRequest:DataClass]
# [DEF:generate_docs:Function]
# @PURPOSE: Trigger bulk documentation generation for datasets
# @PRE: User has permission plugin:llm_analysis:execute
# @PRE: env_id is a valid environment ID
# @PRE: dataset_ids is a non-empty list
# @POST: Returns task_id for tracking documentation generation progress
# @POST: Task is created and queued for execution
# @PARAM: request (GenerateDocsRequest) - Documentation generation request
# @RETURN: TaskResponse - Task ID for tracking
# @RELATION: DISPATCHES -> LLMAnalysisPlugin
# @RELATION: CALLS -> task_manager.create_task
@router.post("/api/datasets/generate-docs", response_model=TaskResponse)
async def generate_docs(
request: GenerateDocsRequest,
config_manager=Depends(get_config_manager),
task_manager=Depends(get_task_manager),
_ = Depends(has_permission("plugin:llm_analysis", "EXECUTE"))
):
with belief_scope("generate_docs", f"env={request.env_id}, count={len(request.dataset_ids)}, provider={request.llm_provider}"):
# Validate request
if not request.dataset_ids:
logger.error("[generate_docs][Coherence:Failed] No dataset IDs provided")
raise HTTPException(status_code=400, detail="At least one dataset ID must be provided")
# Validate environment exists
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == request.env_id), None)
if not env:
logger.error(f"[generate_docs][Coherence:Failed] Environment not found: {request.env_id}")
raise HTTPException(status_code=404, detail="Environment not found")
try:
# Create documentation generation task
task_params = {
'env_id': request.env_id,
'datasets': request.dataset_ids,
'llm_provider': request.llm_provider,
'options': request.options or {}
}
task_id = await task_manager.create_task(
plugin_id='llm_documentation',
params=task_params
)
logger.info(f"[generate_docs][Coherence:OK] Documentation generation task created: {task_id} for {len(request.dataset_ids)} datasets")
return TaskResponse(task_id=str(task_id))
except Exception as e:
logger.error(f"[generate_docs][Coherence:Failed] Failed to create documentation generation task: {e}")
raise HTTPException(status_code=503, detail=f"Failed to create documentation generation task: {str(e)}")
# [/DEF:generate_docs:Function]
# [DEF:get_dataset_detail:Function]
# @PURPOSE: Get detailed dataset information including columns and linked dashboards
# @PRE: env_id is a valid environment ID
# @PRE: dataset_id is a valid dataset ID
# @POST: Returns detailed dataset info with columns and linked dashboards
# @PARAM: env_id (str) - The environment ID
# @PARAM: dataset_id (int) - The dataset ID
# @RETURN: DatasetDetailResponse - Detailed dataset information
# @RELATION: CALLS -> SupersetClient.get_dataset_detail
@router.get("/api/datasets/{dataset_id}", response_model=DatasetDetailResponse)
async def get_dataset_detail(
env_id: str,
dataset_id: int,
config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_dataset_detail", f"env_id={env_id}, dataset_id={dataset_id}"):
# Validate environment exists
environments = config_manager.get_environments()
env = next((e for e in environments if e.id == env_id), None)
if not env:
logger.error(f"[get_dataset_detail][Coherence:Failed] Environment not found: {env_id}")
raise HTTPException(status_code=404, detail="Environment not found")
try:
# Fetch detailed dataset info using SupersetClient
client = SupersetClient(env)
dataset_detail = client.get_dataset_detail(dataset_id)
logger.info(f"[get_dataset_detail][Coherence:OK] Retrieved dataset {dataset_id} with {dataset_detail['column_count']} columns and {dataset_detail['linked_dashboard_count']} linked dashboards")
return DatasetDetailResponse(**dataset_detail)
except Exception as e:
logger.error(f"[get_dataset_detail][Coherence:Failed] Failed to fetch dataset detail: {e}")
raise HTTPException(status_code=503, detail=f"Failed to fetch dataset detail: {str(e)}")
# [/DEF:get_dataset_detail:Function]
# [/DEF:backend.src.api.routes.datasets:Module]

View File

@@ -303,11 +303,11 @@ async def get_consolidated_settings(
config = config_manager.get_config()
return ConsolidatedSettingsResponse(
environments=config.environments,
environments=[env.dict() for env in config.environments],
connections=config.settings.connections,
llm=config.settings.llm,
logging=config.settings.logging,
storage=config.settings.storage
logging=config.settings.logging.dict(),
storage=config.settings.storage.dict()
)
# [/DEF:get_consolidated_settings:Function]

View File

@@ -115,7 +115,7 @@ app.include_router(plugins.router, prefix="/api/plugins", tags=["Plugins"])
app.include_router(tasks.router, prefix="/api/tasks", tags=["Tasks"])
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
app.include_router(connections.router, prefix="/api/settings/connections", tags=["Connections"])
app.include_router(environments.router, prefix="/api/environments", tags=["Environments"])
app.include_router(environments.router, prefix="/api/settings/environments", tags=["Environments"])
app.include_router(mappings.router)
app.include_router(migration.router)
app.include_router(git.router)

View File

@@ -48,6 +48,8 @@ class GlobalSettings(BaseModel):
storage: StorageConfig = Field(default_factory=StorageConfig)
default_environment_id: Optional[str] = None
logging: LoggingConfig = Field(default_factory=LoggingConfig)
connections: List[dict] = []
llm: dict = Field(default_factory=lambda: {"providers": [], "default_provider": ""})
# Task retention settings
task_retention_days: int = 30

View File

@@ -236,6 +236,82 @@ class SupersetClient:
return result
# [/DEF:get_datasets_summary:Function]
# [DEF:get_dataset_detail:Function]
# @PURPOSE: Fetches detailed dataset information including columns and linked dashboards
# @PRE: Client is authenticated and dataset_id exists.
# @POST: Returns detailed dataset info with columns and linked dashboards.
# @PARAM: dataset_id (int) - The dataset ID to fetch details for.
# @RETURN: Dict - Dataset details with columns and linked_dashboards.
# @RELATION: CALLS -> self.get_dataset
# @RELATION: CALLS -> self.network.request (for related_objects)
def get_dataset_detail(self, dataset_id: int) -> Dict:
with belief_scope("SupersetClient.get_dataset_detail", f"id={dataset_id}"):
# Get base dataset info
dataset = self.get_dataset(dataset_id)
# Extract columns information
columns = dataset.get("columns", [])
column_info = []
for col in columns:
column_info.append({
"id": col.get("id"),
"name": col.get("column_name"),
"type": col.get("type"),
"is_dttm": col.get("is_dttm", False),
"is_active": col.get("is_active", True),
"description": col.get("description", "")
})
# Get linked dashboards using related_objects endpoint
linked_dashboards = []
try:
related_objects = self.network.request(
method="GET",
endpoint=f"/dataset/{dataset_id}/related_objects"
)
# Handle different response formats
if isinstance(related_objects, dict):
if "dashboards" in related_objects:
dashboards_data = related_objects["dashboards"]
elif "result" in related_objects and isinstance(related_objects["result"], dict):
dashboards_data = related_objects["result"].get("dashboards", [])
else:
dashboards_data = []
for dash in dashboards_data:
linked_dashboards.append({
"id": dash.get("id"),
"title": dash.get("dashboard_title") or dash.get("title", "Unknown"),
"slug": dash.get("slug")
})
except Exception as e:
app_logger.warning(f"[get_dataset_detail][Warning] Failed to fetch related dashboards: {e}")
linked_dashboards = []
# Extract SQL table information
sql = dataset.get("sql", "")
result = {
"id": dataset.get("id"),
"table_name": dataset.get("table_name"),
"schema": dataset.get("schema"),
"database": dataset.get("database", {}).get("database_name", "Unknown"),
"description": dataset.get("description", ""),
"columns": column_info,
"column_count": len(column_info),
"sql": sql,
"linked_dashboards": linked_dashboards,
"linked_dashboard_count": len(linked_dashboards),
"is_sqllab_view": dataset.get("is_sqllab_view", False),
"created_on": dataset.get("created_on"),
"changed_on": dataset.get("changed_on")
}
app_logger.info(f"[get_dataset_detail][Exit] Got dataset {dataset_id} with {len(column_info)} columns and {len(linked_dashboards)} linked dashboards")
return result
# [/DEF:get_dataset_detail:Function]
# [DEF:get_dataset:Function]
# @PURPOSE: Получает информацию о конкретном датасете по его ID.
# @PARAM: dataset_id (int) - ID датасета.

View File

@@ -42,6 +42,8 @@ def suggest_mappings(source_databases: List[Dict], target_databases: List[Dict],
name, score, index = match
if score >= threshold:
suggestions.append({
"source_db": s_db['database_name'],
"target_db": target_databases[index]['database_name'],
"source_db_uuid": s_db['uuid'],
"target_db_uuid": target_databases[index]['uuid'],
"confidence": score / 100.0

View File

@@ -118,14 +118,41 @@ class APIClient:
def _init_session(self) -> requests.Session:
with belief_scope("_init_session"):
session = requests.Session()
# Create a custom adapter that handles TLS issues
class TLSAdapter(HTTPAdapter):
def init_poolmanager(self, connections, maxsize, block=False):
from urllib3.poolmanager import PoolManager
import ssl
# Create an SSL context that ignores TLSv1 unrecognized name errors
ctx = ssl.create_default_context()
ctx.set_ciphers('HIGH:!aNULL:!eNULL:!EXPORT:!DES:!RC4:!MD5:!PSK:!SRP:!CAMELLIA')
# Ignore TLSV1_UNRECOGNIZED_NAME errors by disabling hostname verification
# This is safe when verify_ssl is false (we're already not verifying the certificate)
ctx.check_hostname = False
self.poolmanager = PoolManager(
num_pools=connections,
maxsize=maxsize,
block=block,
ssl_context=ctx
)
retries = Retry(total=3, backoff_factor=0.5, status_forcelist=[500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retries)
adapter = TLSAdapter(max_retries=retries)
session.mount('http://', adapter)
session.mount('https://', adapter)
if not self.request_settings["verify_ssl"]:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
app_logger.warning("[_init_session][State] SSL verification disabled.")
session.verify = self.request_settings["verify_ssl"]
# When verify_ssl is false, we should also disable hostname verification
session.verify = False
else:
session.verify = True
return session
# [/DEF:_init_session:Function]

View File

@@ -1,8 +1,8 @@
# [DEF:Dependencies:Module]
# @SEMANTICS: dependency, injection, singleton, factory, auth, jwt
# @PURPOSE: Manages the creation and provision of shared application dependencies, such as the PluginLoader and TaskManager, to avoid circular imports.
# @PURPOSE: Manages creation and provision of shared application dependencies, such as PluginLoader and TaskManager, to avoid circular imports.
# @LAYER: Core
# @RELATION: Used by the main app and API routers to get access to shared instances.
# @RELATION: Used by main app and API routers to get access to shared instances.
from pathlib import Path
from fastapi import Depends, HTTPException, status
@@ -13,6 +13,7 @@ from .core.task_manager import TaskManager
from .core.config_manager import ConfigManager
from .core.scheduler import SchedulerService
from .services.resource_service import ResourceService
from .services.mapping_service import MappingService
from .core.database import init_db, get_auth_db
from .core.logger import logger
from .core.auth.jwt import decode_token
@@ -29,12 +30,12 @@ config_manager = ConfigManager(config_path=str(config_path))
init_db()
# [DEF:get_config_manager:Function]
# @PURPOSE: Dependency injector for the ConfigManager.
# @PURPOSE: Dependency injector for ConfigManager.
# @PRE: Global config_manager must be initialized.
# @POST: Returns shared ConfigManager instance.
# @RETURN: ConfigManager - The shared config manager instance.
def get_config_manager() -> ConfigManager:
"""Dependency injector for the ConfigManager."""
"""Dependency injector for ConfigManager."""
return config_manager
# [/DEF:get_config_manager:Function]
@@ -54,54 +55,64 @@ resource_service = ResourceService()
logger.info("ResourceService initialized")
# [DEF:get_plugin_loader:Function]
# @PURPOSE: Dependency injector for the PluginLoader.
# @PURPOSE: Dependency injector for PluginLoader.
# @PRE: Global plugin_loader must be initialized.
# @POST: Returns shared PluginLoader instance.
# @RETURN: PluginLoader - The shared plugin loader instance.
def get_plugin_loader() -> PluginLoader:
"""Dependency injector for the PluginLoader."""
"""Dependency injector for PluginLoader."""
return plugin_loader
# [/DEF:get_plugin_loader:Function]
# [DEF:get_task_manager:Function]
# @PURPOSE: Dependency injector for the TaskManager.
# @PURPOSE: Dependency injector for TaskManager.
# @PRE: Global task_manager must be initialized.
# @POST: Returns shared TaskManager instance.
# @RETURN: TaskManager - The shared task manager instance.
def get_task_manager() -> TaskManager:
"""Dependency injector for the TaskManager."""
"""Dependency injector for TaskManager."""
return task_manager
# [/DEF:get_task_manager:Function]
# [DEF:get_scheduler_service:Function]
# @PURPOSE: Dependency injector for the SchedulerService.
# @PURPOSE: Dependency injector for SchedulerService.
# @PRE: Global scheduler_service must be initialized.
# @POST: Returns shared SchedulerService instance.
# @RETURN: SchedulerService - The shared scheduler service instance.
def get_scheduler_service() -> SchedulerService:
"""Dependency injector for the SchedulerService."""
"""Dependency injector for SchedulerService."""
return scheduler_service
# [/DEF:get_scheduler_service:Function]
# [DEF:get_resource_service:Function]
# @PURPOSE: Dependency injector for the ResourceService.
# @PURPOSE: Dependency injector for ResourceService.
# @PRE: Global resource_service must be initialized.
# @POST: Returns shared ResourceService instance.
# @RETURN: ResourceService - The shared resource service instance.
def get_resource_service() -> ResourceService:
"""Dependency injector for the ResourceService."""
"""Dependency injector for ResourceService."""
return resource_service
# [/DEF:get_resource_service:Function]
# [DEF:get_mapping_service:Function]
# @PURPOSE: Dependency injector for MappingService.
# @PRE: Global config_manager must be initialized.
# @POST: Returns new MappingService instance.
# @RETURN: MappingService - A new mapping service instance.
def get_mapping_service() -> MappingService:
"""Dependency injector for MappingService."""
return MappingService(config_manager)
# [/DEF:get_mapping_service:Function]
# [DEF:oauth2_scheme:Variable]
# @PURPOSE: OAuth2 password bearer scheme for token extraction.
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login")
# [/DEF:oauth2_scheme:Variable]
# [DEF:get_current_user:Function]
# @PURPOSE: Dependency for retrieving the currently authenticated user from a JWT.
# @PURPOSE: Dependency for retrieving currently authenticated user from a JWT.
# @PRE: JWT token provided in Authorization header.
# @POST: Returns the User object if token is valid.
# @POST: Returns User object if token is valid.
# @THROW: HTTPException 401 if token is invalid or user not found.
# @PARAM: token (str) - Extracted JWT token.
# @PARAM: db (Session) - Auth database session.
@@ -157,4 +168,4 @@ def has_permission(resource: str, action: str):
return permission_checker
# [/DEF:has_permission:Function]
# [/DEF:Dependencies:Module]
# [/DEF:Dependencies:Module]

View File

@@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
Script to test dataset-to-dashboard relationships from Superset API.
Usage:
cd backend && .venv/bin/python3 src/scripts/test_dataset_dashboard_relations.py
"""
import json
import sys
from pathlib import Path
# Add src to path (parent of scripts directory)
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.core.superset_client import SupersetClient
from src.core.config_manager import ConfigManager
from src.core.logger import logger
def test_dashboard_dataset_relations():
"""Test fetching dataset-to-dashboard relationships."""
# Load environment from existing config
config_manager = ConfigManager()
environments = config_manager.get_environments()
if not environments:
logger.error("No environments configured!")
return
# Use first available environment
env = environments[0]
logger.info(f"Using environment: {env.name} ({env.url})")
client = SupersetClient(env)
try:
# Authenticate
logger.info("Authenticating to Superset...")
client.authenticate()
logger.info("Authentication successful!")
# Test dashboard ID 13
dashboard_id = 13
logger.info(f"\n=== Fetching Dashboard {dashboard_id} ===")
dashboard = client.network.request(method="GET", endpoint=f"/dashboard/{dashboard_id}")
print("\nDashboard structure:")
print(f" ID: {dashboard.get('id')}")
print(f" Title: {dashboard.get('dashboard_title')}")
print(f" Published: {dashboard.get('published')}")
# Check for slices/charts
if 'slices' in dashboard:
logger.info(f"\n Found {len(dashboard['slices'])} slices/charts in dashboard")
for i, slice_data in enumerate(dashboard['slices'][:5]): # Show first 5
print(f" Slice {i+1}:")
print(f" ID: {slice_data.get('slice_id')}")
print(f" Name: {slice_data.get('slice_name')}")
# Check for datasource_id
if 'datasource_id' in slice_data:
print(f" Datasource ID: {slice_data['datasource_id']}")
if 'datasource_name' in slice_data:
print(f" Datasource Name: {slice_data['datasource_name']}")
if 'datasource_type' in slice_data:
print(f" Datasource Type: {slice_data['datasource_type']}")
else:
logger.warning(" No 'slices' field found in dashboard response")
logger.info(f" Available fields: {list(dashboard.keys())}")
# Test dataset ID 26
dataset_id = 26
logger.info(f"\n=== Fetching Dataset {dataset_id} ===")
dataset = client.get_dataset(dataset_id)
print("\nDataset structure:")
print(f" ID: {dataset.get('id')}")
print(f" Table Name: {dataset.get('table_name')}")
print(f" Schema: {dataset.get('schema')}")
print(f" Database: {dataset.get('database', {}).get('database_name', 'Unknown')}")
# Check for dashboards that use this dataset
logger.info(f"\n=== Finding Dashboards using Dataset {dataset_id} ===")
# Method: Use Superset's related_objects API
try:
logger.info(f" Using /api/v1/dataset/{dataset_id}/related_objects endpoint...")
related_objects = client.network.request(
method="GET",
endpoint=f"/dataset/{dataset_id}/related_objects"
)
logger.info(f" Related objects response type: {type(related_objects)}")
logger.info(f" Related objects keys: {list(related_objects.keys()) if isinstance(related_objects, dict) else 'N/A'}")
# Check for dashboards in related objects
if 'dashboards' in related_objects:
dashboards = related_objects['dashboards']
logger.info(f" Found {len(dashboards)} dashboards using this dataset:")
for dash in dashboards:
logger.info(f" - Dashboard ID {dash.get('id')}: {dash.get('dashboard_title', dash.get('title', 'Unknown'))}")
elif 'result' in related_objects:
# Some Superset versions use 'result' wrapper
result = related_objects['result']
if 'dashboards' in result:
dashboards = result['dashboards']
logger.info(f" Found {len(dashboards)} dashboards using this dataset:")
for dash in dashboards:
logger.info(f" - Dashboard ID {dash.get('id')}: {dash.get('dashboard_title', dash.get('title', 'Unknown'))}")
else:
logger.warning(f" No 'dashboards' key in result. Keys: {list(result.keys())}")
else:
logger.warning(f" No 'dashboards' key in response. Available keys: {list(related_objects.keys())}")
logger.info(f" Full related_objects response:")
print(json.dumps(related_objects, indent=2, default=str)[:1000])
except Exception as e:
logger.error(f" Error fetching related objects: {e}")
import traceback
traceback.print_exc()
# Method 2: Try to use the position_json from dashboard
logger.info(f"\n=== Analyzing Dashboard Position JSON ===")
if 'position_json' in dashboard:
position_data = json.loads(dashboard['position_json'])
logger.info(f" Position data type: {type(position_data)}")
# Look for datasource references
datasource_ids = set()
if isinstance(position_data, dict):
for key, value in position_data.items():
if 'datasource' in key.lower() or key == 'DASHBOARD_VERSION_KEY':
logger.debug(f" Key: {key}, Value type: {type(value)}")
elif isinstance(position_data, list):
logger.info(f" Position data has {len(position_data)} items")
for item in position_data[:3]: # Show first 3
logger.debug(f" Item: {type(item)}, keys: {list(item.keys()) if isinstance(item, dict) else 'N/A'}")
if isinstance(item, dict):
if 'datasource_id' in item:
datasource_ids.add(item['datasource_id'])
if datasource_ids:
logger.info(f" Found datasource IDs: {datasource_ids}")
# Save full response for analysis
output_file = Path(__file__).parent / "dataset_dashboard_analysis.json"
with open(output_file, 'w') as f:
json.dump({
'dashboard': dashboard,
'dataset': dataset
}, f, indent=2, default=str)
logger.info(f"\nFull response saved to: {output_file}")
except Exception as e:
logger.error(f"Error: {e}", exc_info=True)
raise
if __name__ == "__main__":
test_dashboard_dataset_relations()