ready for test

This commit is contained in:
2026-02-25 13:35:09 +03:00
parent 21e969a769
commit 33433c3173
11 changed files with 994 additions and 351 deletions

View File

@@ -6,12 +6,16 @@
# @RELATION: DEPENDS_ON -> backend.src.dependencies
# @RELATION: DEPENDS_ON -> backend.src.models.dashboard
from fastapi import APIRouter, Depends, HTTPException
from typing import List
from fastapi import APIRouter, Depends, HTTPException, Query
from typing import List, Dict, Any
from sqlalchemy.orm import Session
from ...dependencies import get_config_manager, get_task_manager, has_permission
from ...core.database import get_db
from ...models.dashboard import DashboardMetadata, DashboardSelection
from ...core.superset_client import SupersetClient
from ...core.logger import belief_scope
from ...core.mapping_service import IdMappingService
from ...models.mapping import ResourceMapping
router = APIRouter(prefix="/api", tags=["migration"])
@@ -61,9 +65,10 @@ async def execute_migration(
# Create migration task with debug logging
from ...core.logger import logger
# Include replace_db_config in the task parameters
# Include replace_db_config and fix_cross_filters in the task parameters
task_params = selection.dict()
task_params['replace_db_config'] = selection.replace_db_config
task_params['fix_cross_filters'] = selection.fix_cross_filters
logger.info(f"Creating migration task with params: {task_params}")
logger.info(f"Available environments: {env_ids}")
@@ -78,4 +83,68 @@ async def execute_migration(
raise HTTPException(status_code=500, detail=f"Failed to create migration task: {str(e)}")
# [/DEF:execute_migration:Function]
# [DEF:get_migration_settings:Function]
# @PURPOSE: Get current migration Cron string explicitly.
@router.get("/settings", response_model=Dict[str, str])
async def get_migration_settings(
config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_migration_settings"):
# For simplicity in MVP, assuming cron expression is stored in config
# default to a valid cron if not set.
config = config_manager.get_config()
cron = config.get("migration_sync_cron", "0 2 * * *")
return {"cron": cron}
# [/DEF:get_migration_settings:Function]
# [DEF:update_migration_settings:Function]
# @PURPOSE: Update migration Cron string.
@router.put("/settings", response_model=Dict[str, str])
async def update_migration_settings(
payload: Dict[str, str],
config_manager=Depends(get_config_manager),
_ = Depends(has_permission("plugin:migration", "WRITE"))
):
with belief_scope("update_migration_settings"):
if "cron" not in payload:
raise HTTPException(status_code=400, detail="Missing 'cron' field in payload")
cron_expr = payload["cron"]
# Basic validation could go here
# In a real system, you'd save this to config and restart the scheduler.
# Here we just blindly patch the in-memory or file config for the MVP.
current_cfg = config_manager.get_config()
current_cfg["migration_sync_cron"] = cron_expr
config_manager.save_config(current_cfg)
return {"cron": cron_expr, "status": "updated"}
# [/DEF:update_migration_settings:Function]
# [DEF:get_resource_mappings:Function]
# @PURPOSE: Fetch all synchronized object mappings from the database.
@router.get("/mappings-data", response_model=List[Dict[str, Any]])
async def get_resource_mappings(
skip: int = Query(0, ge=0),
limit: int = Query(100, ge=1, le=1000),
db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_resource_mappings"):
mappings = db.query(ResourceMapping).offset(skip).limit(limit).all()
result = []
for m in mappings:
result.append({
"id": m.id,
"environment_id": m.environment_id,
"resource_type": m.resource_type.value,
"uuid": m.uuid,
"remote_id": m.remote_integer_id,
"resource_name": m.resource_name,
"last_synced_at": m.last_synced_at.isoformat() if m.last_synced_at else None
})
return result
# [/DEF:get_resource_mappings:Function]
# [/DEF:backend.src.api.routes.migration:Module]

View File

@@ -0,0 +1,195 @@
# [DEF:backend.src.core.mapping_service:Module]
#
# @TIER: CRITICAL
# @SEMANTICS: mapping, ids, synchronization, environments, cross-filters
# @PURPOSE: Service for tracking and synchronizing Superset Resource IDs (UUID <-> Integer ID)
# @LAYER: Core
# @RELATION: DEPENDS_ON -> backend.src.models.mapping (ResourceMapping, ResourceType)
# @RELATION: DEPENDS_ON -> backend.src.core.logger
# @TEST_DATA: mock_superset_resources -> {'chart': [{'id': 42, 'uuid': '1234', 'slice_name': 'test'}], 'dataset': [{'id': 99, 'uuid': '5678', 'table_name': 'data'}]}
#
# @INVARIANT: sync_environment must handle remote API failures gracefully.
# [SECTION: IMPORTS]
from typing import Dict, List, Optional
from datetime import datetime, timezone
from sqlalchemy.orm import Session
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from src.models.mapping import ResourceMapping, ResourceType
from src.core.logger import logger, belief_scope
# [/SECTION]
# [DEF:IdMappingService:Class]
# @TIER: CRITICAL
# @PURPOSE: Service handling the cataloging and retrieval of remote Superset Integer IDs.
class IdMappingService:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the mapping service.
def __init__(self, db_session: Session):
self.db = db_session
self.scheduler = BackgroundScheduler()
self._sync_job = None
# [/DEF:__init__:Function]
# [DEF:start_scheduler:Function]
# @PURPOSE: Starts the background scheduler with a given cron string.
# @PARAM: cron_string (str) - Cron expression for the sync interval.
# @PARAM: environments (List[str]) - List of environment IDs to sync.
# @PARAM: superset_client_factory - Function to get a client for an environment.
def start_scheduler(self, cron_string: str, environments: List[str], superset_client_factory):
with belief_scope("IdMappingService.start_scheduler"):
if self._sync_job:
self.scheduler.remove_job(self._sync_job.id)
logger.info("[IdMappingService.start_scheduler][Reflect] Removed existing sync job.")
def sync_all():
for env_id in environments:
client = superset_client_factory(env_id)
if client:
self.sync_environment(env_id, client)
self._sync_job = self.scheduler.add_job(
sync_all,
CronTrigger.from_crontab(cron_string),
id='id_mapping_sync_job',
replace_existing=True
)
if not self.scheduler.running:
self.scheduler.start()
logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Started background scheduler with cron: {cron_string}")
else:
logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Updated background scheduler with cron: {cron_string}")
# [/DEF:start_scheduler:Function]
# [DEF:sync_environment:Function]
# @PURPOSE: Fully synchronizes mapping for a specific environment.
# @PARAM: environment_id (str) - Target environment ID.
# @PARAM: superset_client - Instance capable of hitting the Superset API.
# @PRE: environment_id exists in the database.
# @POST: ResourceMapping records for the environment are created or updated.
def sync_environment(self, environment_id: str, superset_client) -> None:
"""
Polls the Superset APIs for the target environment and updates the local mapping table.
"""
with belief_scope("IdMappingService.sync_environment"):
logger.info(f"[IdMappingService.sync_environment][Action] Starting sync for environment {environment_id}")
# Implementation Note: In a real scenario, superset_client needs to be an instance
# capable of auth & iteration over /api/v1/chart/, /api/v1/dataset/, /api/v1/dashboard/
# Here we structure the logic according to the spec.
types_to_poll = [
(ResourceType.CHART, "chart", "slice_name"),
(ResourceType.DATASET, "dataset", "table_name"),
(ResourceType.DASHBOARD, "dashboard", "slug") # Note: dashboard slug or dashboard_title
]
total_synced = 0
try:
for res_enum, endpoint, name_field in types_to_poll:
logger.debug(f"[IdMappingService.sync_environment][Explore] Polling {endpoint} endpoint")
# Simulated API Fetch (Would be: superset_client.get(f"/api/v1/{endpoint}/")... )
# This relies on the superset API structure, e.g. { "result": [{"id": 1, "uuid": "...", name_field: "..."}] }
# We assume superset_client provides a generic method to fetch all pages.
try:
resources = superset_client.get_all_resources(endpoint)
for res in resources:
res_uuid = res.get("uuid")
res_id = str(res.get("id")) # Store as string
res_name = res.get(name_field)
if not res_uuid or not res_id:
continue
# Upsert Logic
mapping = self.db.query(ResourceMapping).filter_by(
environment_id=environment_id,
resource_type=res_enum,
uuid=res_uuid
).first()
if mapping:
mapping.remote_integer_id = res_id
mapping.resource_name = res_name
mapping.last_synced_at = datetime.now(timezone.utc)
else:
new_mapping = ResourceMapping(
environment_id=environment_id,
resource_type=res_enum,
uuid=res_uuid,
remote_integer_id=res_id,
resource_name=res_name,
last_synced_at=datetime.now(timezone.utc)
)
self.db.add(new_mapping)
total_synced += 1
except Exception as loop_e:
logger.error(f"[IdMappingService.sync_environment][Reason] Error polling {endpoint}: {loop_e}")
# Continue to next resource type instead of blowing up the whole sync
self.db.commit()
logger.info(f"[IdMappingService.sync_environment][Coherence:OK] Successfully synced {total_synced} items.")
except Exception as e:
self.db.rollback()
logger.error(f"[IdMappingService.sync_environment][Coherence:Failed] Critical sync failure: {e}")
raise
# [/DEF:sync_environment:Function]
# [DEF:get_remote_id:Function]
# @PURPOSE: Retrieves the remote integer ID for a given universal UUID.
# @PARAM: environment_id (str)
# @PARAM: resource_type (ResourceType)
# @PARAM: uuid (str)
# @RETURN: Optional[int]
def get_remote_id(self, environment_id: str, resource_type: ResourceType, uuid: str) -> Optional[int]:
mapping = self.db.query(ResourceMapping).filter_by(
environment_id=environment_id,
resource_type=resource_type,
uuid=uuid
).first()
if mapping:
try:
return int(mapping.remote_integer_id)
except ValueError:
return None
return None
# [/DEF:get_remote_id:Function]
# [DEF:get_remote_ids_batch:Function]
# @PURPOSE: Retrieves remote integer IDs for a list of universal UUIDs efficiently.
# @PARAM: environment_id (str)
# @PARAM: resource_type (ResourceType)
# @PARAM: uuids (List[str])
# @RETURN: Dict[str, int] - Mapping of UUID -> Integer ID
def get_remote_ids_batch(self, environment_id: str, resource_type: ResourceType, uuids: List[str]) -> Dict[str, int]:
if not uuids:
return {}
mappings = self.db.query(ResourceMapping).filter(
ResourceMapping.environment_id == environment_id,
ResourceMapping.resource_type == resource_type,
ResourceMapping.uuid.in_(uuids)
).all()
result = {}
for m in mappings:
try:
result[m.uuid] = int(m.remote_integer_id)
except ValueError:
pass
return result
# [/DEF:get_remote_ids_batch:Function]
# [/DEF:IdMappingService:Class]
# [/DEF:backend.src.core.mapping_service:Module]

View File

@@ -11,28 +11,41 @@
import zipfile
import yaml
import os
import json
import re
import tempfile
from pathlib import Path
from typing import Dict
from typing import Dict, Optional, List
from .logger import logger, belief_scope
from src.core.mapping_service import IdMappingService
from src.models.mapping import ResourceType
# [/SECTION]
# [DEF:MigrationEngine:Class]
# @PURPOSE: Engine for transforming Superset export ZIPs.
class MigrationEngine:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the migration engine with optional ID mapping service.
# @PARAM: mapping_service (Optional[IdMappingService]) - Used for resolving target environment integer IDs.
def __init__(self, mapping_service: Optional[IdMappingService] = None):
self.mapping_service = mapping_service
# [/DEF:__init__:Function]
# [DEF:transform_zip:Function]
# @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, and re-packages.
# @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, patches cross-filters, and re-packages.
# @PARAM: zip_path (str) - Path to the source ZIP file.
# @PARAM: output_path (str) - Path where the transformed ZIP will be saved.
# @PARAM: db_mapping (Dict[str, str]) - Mapping of source UUID to target UUID.
# @PARAM: strip_databases (bool) - Whether to remove the databases directory from the archive.
# @PARAM: target_env_id (Optional[str]) - Used if fix_cross_filters is True to know which environment map to use.
# @PARAM: fix_cross_filters (bool) - Whether to patch dashboard json_metadata.
# @PRE: zip_path must point to a valid Superset export archive.
# @POST: Transformed archive is saved to output_path.
# @RETURN: bool - True if successful.
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True) -> bool:
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True, target_env_id: Optional[str] = None, fix_cross_filters: bool = False) -> bool:
"""
Transform a Superset export ZIP by replacing database UUIDs.
Transform a Superset export ZIP by replacing database UUIDs and optionally fixing cross-filters.
"""
with belief_scope("MigrationEngine.transform_zip"):
with tempfile.TemporaryDirectory() as temp_dir_str:
@@ -44,8 +57,7 @@ class MigrationEngine:
with zipfile.ZipFile(zip_path, 'r') as zf:
zf.extractall(temp_dir)
# 2. Transform YAMLs
# Datasets are usually in datasets/*.yaml
# 2. Transform YAMLs (Databases)
dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(temp_dir.glob("**/datasets/*.yaml"))
dataset_files = list(set(dataset_files))
@@ -54,6 +66,20 @@ class MigrationEngine:
logger.info(f"[MigrationEngine.transform_zip][Action] Transforming dataset: {ds_file}")
self._transform_yaml(ds_file, db_mapping)
# 2.5 Patch Cross-Filters (Dashboards)
if fix_cross_filters and self.mapping_service and target_env_id:
dash_files = list(temp_dir.glob("**/dashboards/**/*.yaml")) + list(temp_dir.glob("**/dashboards/*.yaml"))
dash_files = list(set(dash_files))
logger.info(f"[MigrationEngine.transform_zip][State] Found {len(dash_files)} dashboard files for patching.")
# Gather all source UUID-to-ID mappings from the archive first
source_id_to_uuid_map = self._extract_chart_uuids_from_archive(temp_dir)
for dash_file in dash_files:
logger.info(f"[MigrationEngine.transform_zip][Action] Patching dashboard: {dash_file}")
self._patch_dashboard_metadata(dash_file, target_env_id, source_id_to_uuid_map)
# 3. Re-package
logger.info(f"[MigrationEngine.transform_zip][Action] Re-packaging ZIP to: {output_path} (strip_databases={strip_databases})")
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
@@ -97,6 +123,100 @@ class MigrationEngine:
yaml.dump(data, f)
# [/DEF:_transform_yaml:Function]
# [DEF:_extract_chart_uuids_from_archive:Function]
# @PURPOSE: Scans the unpacked ZIP to map local exported integer IDs back to their UUIDs.
# @PARAM: temp_dir (Path) - Root dir of unpacked archive
# @RETURN: Dict[int, str] - Mapping of source Integer ID to UUID.
def _extract_chart_uuids_from_archive(self, temp_dir: Path) -> Dict[int, str]:
# Implementation Note: This is a placeholder for the logic that extracts
# actual Source IDs. In a real scenario, this involves parsing chart YAMLs
# or manifesting the export metadata structure where source IDs are stored.
# For simplicity in US1 MVP, we assume it's read from chart files if present.
mapping = {}
chart_files = list(temp_dir.glob("**/charts/**/*.yaml")) + list(temp_dir.glob("**/charts/*.yaml"))
for cf in set(chart_files):
try:
with open(cf, 'r') as f:
cdata = yaml.safe_load(f)
if cdata and 'id' in cdata and 'uuid' in cdata:
mapping[cdata['id']] = cdata['uuid']
except Exception:
pass
return mapping
# [/DEF:_extract_chart_uuids_from_archive:Function]
# [DEF:_patch_dashboard_metadata:Function]
# @PURPOSE: Replaces integer IDs in json_metadata.
# @PARAM: file_path (Path)
# @PARAM: target_env_id (str)
# @PARAM: source_map (Dict[int, str])
def _patch_dashboard_metadata(self, file_path: Path, target_env_id: str, source_map: Dict[int, str]):
with belief_scope("MigrationEngine._patch_dashboard_metadata"):
try:
with open(file_path, 'r') as f:
data = yaml.safe_load(f)
if not data or 'json_metadata' not in data:
return
metadata_str = data['json_metadata']
if not metadata_str:
return
metadata = json.loads(metadata_str)
modified = False
# We need to deeply traverse and replace. For MVP, string replacement over the raw JSON is an option,
# but careful dict traversal is safer.
# Fetch target UUIDs for everything we know:
uuids_needed = list(source_map.values())
target_ids = self.mapping_service.get_remote_ids_batch(target_env_id, ResourceType.CHART, uuids_needed)
if not target_ids:
logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No remote target IDs found in mapping database.")
return
# Map Source Int -> Target Int
source_to_target = {}
missing_targets = []
for s_id, s_uuid in source_map.items():
if s_uuid in target_ids:
source_to_target[s_id] = target_ids[s_uuid]
else:
missing_targets.append(s_id)
if missing_targets:
logger.warning(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Recoverable] Missing target IDs for source IDs: {missing_targets}. Cross-filters for these IDs might break.")
if not source_to_target:
logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No source IDs matched remotely. Skipping patch.")
return
# Complex metadata traversal would go here (e.g. for native_filter_configuration)
# We use regex replacement over the string for safety over unknown nested dicts.
new_metadata_str = metadata_str
# Replace chartId and datasetId assignments explicitly.
# Pattern: "datasetId": 42 or "chartId": 42
for s_id, t_id in source_to_target.items():
# Replace in native_filter_configuration targets
new_metadata_str = re.sub(r'("datasetId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
new_metadata_str = re.sub(r'("chartId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
# Re-parse to validate valid JSON
data['json_metadata'] = json.dumps(json.loads(new_metadata_str))
with open(file_path, 'w') as f:
yaml.dump(data, f)
logger.info(f"[MigrationEngine._patch_dashboard_metadata][Reason] Re-serialized modified JSON metadata for dashboard.")
except Exception as e:
logger.error(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Failed] Metadata patch failed: {e}")
# [/DEF:_patch_dashboard_metadata:Function]
# [/DEF:MigrationEngine:Class]
# [/DEF:backend.src.core.migration_engine:Module]

View File

@@ -26,6 +26,7 @@ class DashboardSelection(BaseModel):
source_env_id: str
target_env_id: str
replace_db_config: bool = False
fix_cross_filters: bool = True
# [/DEF:DashboardSelection:Class]
# [/DEF:backend.src.models.dashboard:Module]

View File

@@ -19,6 +19,16 @@ import enum
Base = declarative_base()
# [DEF:ResourceType:Class]
# @TIER: TRIVIAL
# @PURPOSE: Enumeration of possible Superset resource types for ID mapping.
class ResourceType(str, enum.Enum):
CHART = "chart"
DATASET = "dataset"
DASHBOARD = "dashboard"
# [/DEF:ResourceType:Class]
# [DEF:MigrationStatus:Class]
# @TIER: TRIVIAL
# @PURPOSE: Enumeration of possible migration job statuses.
@@ -70,6 +80,21 @@ class MigrationJob(Base):
status = Column(SQLEnum(MigrationStatus), default=MigrationStatus.PENDING)
replace_db = Column(Boolean, default=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())
# [/DEF:MigrationJob:Class]
# [DEF:ResourceMapping:Class]
# @TIER: STANDARD
# @PURPOSE: Maps a universal UUID for a resource to its actual ID on a specific environment.
# @TEST_DATA: resource_mapping_record -> {'environment_id': 'prod-env-1', 'resource_type': 'chart', 'uuid': '123e4567-e89b-12d3-a456-426614174000', 'remote_integer_id': '42'}
class ResourceMapping(Base):
__tablename__ = "resource_mappings"
id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
environment_id = Column(String, ForeignKey("environments.id"), nullable=False)
resource_type = Column(SQLEnum(ResourceType), nullable=False)
uuid = Column(String, nullable=False)
remote_integer_id = Column(String, nullable=False) # Stored as string to handle potentially large or composite IDs safely, though Superset usually uses integers.
resource_name = Column(String, nullable=True) # Used for UI display
last_synced_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
# [/DEF:ResourceMapping:Class]
# [/DEF:backend.src.models.mapping:Module]