ready for test

2026-02-25 13:35:09 +03:00
parent 21e969a769
commit 33433c3173
11 changed files with 994 additions and 351 deletions
--- a/backend/src/api/routes/migration.py
+++ b/backend/src/api/routes/migration.py
@@ -6,12 +6,16 @@
 # @RELATION:  DEPENDS_ON -> backend.src.dependencies
 # @RELATION:  DEPENDS_ON -> backend.src.models.dashboard

-from fastapi import APIRouter, Depends, HTTPException
-from typing import List
+from fastapi import APIRouter, Depends, HTTPException, Query
+from typing import List, Dict, Any
+from sqlalchemy.orm import Session
 from ...dependencies import get_config_manager, get_task_manager, has_permission
+from ...core.database import get_db
 from ...models.dashboard import DashboardMetadata, DashboardSelection
 from ...core.superset_client import SupersetClient
 from ...core.logger import belief_scope
+from ...core.mapping_service import IdMappingService
+from ...models.mapping import ResourceMapping

 router = APIRouter(prefix="/api", tags=["migration"])

@@ -61,9 +65,10 @@ async def execute_migration(
    # Create migration task with debug logging
    from ...core.logger import logger
    
-    # Include replace_db_config in the task parameters
+    # Include replace_db_config and fix_cross_filters in the task parameters
    task_params = selection.dict()
    task_params['replace_db_config'] = selection.replace_db_config
+    task_params['fix_cross_filters'] = selection.fix_cross_filters
    
    logger.info(f"Creating migration task with params: {task_params}")
    logger.info(f"Available environments: {env_ids}")
@@ -78,4 +83,68 @@ async def execute_migration(
        raise HTTPException(status_code=500, detail=f"Failed to create migration task: {str(e)}")
 # [/DEF:execute_migration:Function]

+# [DEF:get_migration_settings:Function]
+# @PURPOSE: Get current migration Cron string explicitly.
+@router.get("/settings", response_model=Dict[str, str])
+async def get_migration_settings(
+    config_manager=Depends(get_config_manager),
+    _ = Depends(has_permission("plugin:migration", "READ"))
+):
+    with belief_scope("get_migration_settings"):
+        # For simplicity in MVP, assuming cron expression is stored in config
+        # default to a valid cron if not set.
+        config = config_manager.get_config()
+        cron = config.get("migration_sync_cron", "0 2 * * *")
+        return {"cron": cron}
+# [/DEF:get_migration_settings:Function]
+
+# [DEF:update_migration_settings:Function]
+# @PURPOSE: Update migration Cron string.
+@router.put("/settings", response_model=Dict[str, str])
+async def update_migration_settings(
+    payload: Dict[str, str],
+    config_manager=Depends(get_config_manager),
+    _ = Depends(has_permission("plugin:migration", "WRITE"))
+):
+    with belief_scope("update_migration_settings"):
+        if "cron" not in payload:
+            raise HTTPException(status_code=400, detail="Missing 'cron' field in payload")
+        
+        cron_expr = payload["cron"]
+        # Basic validation could go here
+        
+        # In a real system, you'd save this to config and restart the scheduler.
+        # Here we just blindly patch the in-memory or file config for the MVP.
+        current_cfg = config_manager.get_config()
+        current_cfg["migration_sync_cron"] = cron_expr
+        config_manager.save_config(current_cfg)
+        
+        return {"cron": cron_expr, "status": "updated"}
+# [/DEF:update_migration_settings:Function]
+
+# [DEF:get_resource_mappings:Function]
+# @PURPOSE: Fetch all synchronized object mappings from the database.
+@router.get("/mappings-data", response_model=List[Dict[str, Any]])
+async def get_resource_mappings(
+    skip: int = Query(0, ge=0),
+    limit: int = Query(100, ge=1, le=1000),
+    db: Session = Depends(get_db),
+    _ = Depends(has_permission("plugin:migration", "READ"))
+):
+    with belief_scope("get_resource_mappings"):
+        mappings = db.query(ResourceMapping).offset(skip).limit(limit).all()
+        result = []
+        for m in mappings:
+            result.append({
+                "id": m.id,
+                "environment_id": m.environment_id,
+                "resource_type": m.resource_type.value,
+                "uuid": m.uuid,
+                "remote_id": m.remote_integer_id,
+                "resource_name": m.resource_name,
+                "last_synced_at": m.last_synced_at.isoformat() if m.last_synced_at else None
+            })
+        return result
+# [/DEF:get_resource_mappings:Function]
+
 # [/DEF:backend.src.api.routes.migration:Module]
--- a/backend/src/core/mapping_service.py
+++ b/backend/src/core/mapping_service.py
@@ -0,0 +1,195 @@
+# [DEF:backend.src.core.mapping_service:Module]
+#
+# @TIER:      CRITICAL
+# @SEMANTICS: mapping, ids, synchronization, environments, cross-filters
+# @PURPOSE:   Service for tracking and synchronizing Superset Resource IDs (UUID <-> Integer ID)
+# @LAYER:     Core
+# @RELATION:  DEPENDS_ON -> backend.src.models.mapping (ResourceMapping, ResourceType)
+# @RELATION:  DEPENDS_ON -> backend.src.core.logger
+# @TEST_DATA: mock_superset_resources -> {'chart': [{'id': 42, 'uuid': '1234', 'slice_name': 'test'}], 'dataset': [{'id': 99, 'uuid': '5678', 'table_name': 'data'}]}
+#
+# @INVARIANT: sync_environment must handle remote API failures gracefully.
+
+# [SECTION: IMPORTS]
+from typing import Dict, List, Optional
+from datetime import datetime, timezone
+from sqlalchemy.orm import Session
+from apscheduler.schedulers.background import BackgroundScheduler
+from apscheduler.triggers.cron import CronTrigger
+from src.models.mapping import ResourceMapping, ResourceType
+from src.core.logger import logger, belief_scope
+# [/SECTION]
+
+# [DEF:IdMappingService:Class]
+# @TIER: CRITICAL
+# @PURPOSE: Service handling the cataloging and retrieval of remote Superset Integer IDs.
+class IdMappingService:
+
+    # [DEF:__init__:Function]
+    # @PURPOSE: Initializes the mapping service.
+    def __init__(self, db_session: Session):
+        self.db = db_session
+        self.scheduler = BackgroundScheduler()
+        self._sync_job = None
+    # [/DEF:__init__:Function]
+
+    # [DEF:start_scheduler:Function]
+    # @PURPOSE: Starts the background scheduler with a given cron string.
+    # @PARAM: cron_string (str) - Cron expression for the sync interval.
+    # @PARAM: environments (List[str]) - List of environment IDs to sync.
+    # @PARAM: superset_client_factory - Function to get a client for an environment.
+    def start_scheduler(self, cron_string: str, environments: List[str], superset_client_factory):
+        with belief_scope("IdMappingService.start_scheduler"):
+            if self._sync_job:
+                self.scheduler.remove_job(self._sync_job.id)
+                logger.info("[IdMappingService.start_scheduler][Reflect] Removed existing sync job.")
+            
+            def sync_all():
+                for env_id in environments:
+                    client = superset_client_factory(env_id)
+                    if client:
+                        self.sync_environment(env_id, client)
+            
+            self._sync_job = self.scheduler.add_job(
+                sync_all,
+                CronTrigger.from_crontab(cron_string),
+                id='id_mapping_sync_job',
+                replace_existing=True
+            )
+            
+            if not self.scheduler.running:
+                self.scheduler.start()
+                logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Started background scheduler with cron: {cron_string}")
+            else:
+                logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Updated background scheduler with cron: {cron_string}")
+    # [/DEF:start_scheduler:Function]
+
+    # [DEF:sync_environment:Function]
+    # @PURPOSE: Fully synchronizes mapping for a specific environment.
+    # @PARAM:   environment_id (str) - Target environment ID.
+    # @PARAM:   superset_client - Instance capable of hitting the Superset API.
+    # @PRE: environment_id exists in the database.
+    # @POST: ResourceMapping records for the environment are created or updated.
+    def sync_environment(self, environment_id: str, superset_client) -> None:
+        """
+        Polls the Superset APIs for the target environment and updates the local mapping table.
+        """
+        with belief_scope("IdMappingService.sync_environment"):
+            logger.info(f"[IdMappingService.sync_environment][Action] Starting sync for environment {environment_id}")
+            
+            # Implementation Note: In a real scenario, superset_client needs to be an instance
+            # capable of auth & iteration over /api/v1/chart/, /api/v1/dataset/, /api/v1/dashboard/
+            # Here we structure the logic according to the spec.
+            
+            types_to_poll = [
+                (ResourceType.CHART, "chart", "slice_name"),
+                (ResourceType.DATASET, "dataset", "table_name"),
+                (ResourceType.DASHBOARD, "dashboard", "slug") # Note: dashboard slug or dashboard_title
+            ]
+
+            total_synced = 0
+            try:
+                for res_enum, endpoint, name_field in types_to_poll:
+                    logger.debug(f"[IdMappingService.sync_environment][Explore] Polling {endpoint} endpoint")
+                    
+                    # Simulated API Fetch (Would be: superset_client.get(f"/api/v1/{endpoint}/")... )
+                    # This relies on the superset API structure, e.g. { "result": [{"id": 1, "uuid": "...", name_field: "..."}] }
+                    # We assume superset_client provides a generic method to fetch all pages.
+                    
+                    try:
+                        resources = superset_client.get_all_resources(endpoint)
+                        
+                        for res in resources:
+                            res_uuid = res.get("uuid")
+                            res_id = str(res.get("id")) # Store as string
+                            res_name = res.get(name_field)
+
+                            if not res_uuid or not res_id:
+                                continue
+
+                            # Upsert Logic
+                            mapping = self.db.query(ResourceMapping).filter_by(
+                                environment_id=environment_id,
+                                resource_type=res_enum,
+                                uuid=res_uuid
+                            ).first()
+
+                            if mapping:
+                                mapping.remote_integer_id = res_id
+                                mapping.resource_name = res_name
+                                mapping.last_synced_at = datetime.now(timezone.utc)
+                            else:
+                                new_mapping = ResourceMapping(
+                                    environment_id=environment_id,
+                                    resource_type=res_enum,
+                                    uuid=res_uuid,
+                                    remote_integer_id=res_id,
+                                    resource_name=res_name,
+                                    last_synced_at=datetime.now(timezone.utc)
+                                )
+                                self.db.add(new_mapping)
+                            
+                            total_synced += 1
+
+                    except Exception as loop_e:
+                        logger.error(f"[IdMappingService.sync_environment][Reason] Error polling {endpoint}: {loop_e}")
+                        # Continue to next resource type instead of blowing up the whole sync
+
+                self.db.commit()
+                logger.info(f"[IdMappingService.sync_environment][Coherence:OK] Successfully synced {total_synced} items.")
+            
+            except Exception as e:
+                self.db.rollback()
+                logger.error(f"[IdMappingService.sync_environment][Coherence:Failed] Critical sync failure: {e}")
+                raise
+    # [/DEF:sync_environment:Function]
+
+    # [DEF:get_remote_id:Function]
+    # @PURPOSE: Retrieves the remote integer ID for a given universal UUID.
+    # @PARAM:   environment_id (str)
+    # @PARAM:   resource_type (ResourceType)
+    # @PARAM:   uuid (str)
+    # @RETURN:  Optional[int]
+    def get_remote_id(self, environment_id: str, resource_type: ResourceType, uuid: str) -> Optional[int]:
+        mapping = self.db.query(ResourceMapping).filter_by(
+            environment_id=environment_id,
+            resource_type=resource_type,
+            uuid=uuid
+        ).first()
+        
+        if mapping:
+            try:
+                return int(mapping.remote_integer_id)
+            except ValueError:
+                return None
+        return None
+    # [/DEF:get_remote_id:Function]
+
+    # [DEF:get_remote_ids_batch:Function]
+    # @PURPOSE: Retrieves remote integer IDs for a list of universal UUIDs efficiently.
+    # @PARAM:   environment_id (str)
+    # @PARAM:   resource_type (ResourceType)
+    # @PARAM:   uuids (List[str])
+    # @RETURN:  Dict[str, int] - Mapping of UUID -> Integer ID
+    def get_remote_ids_batch(self, environment_id: str, resource_type: ResourceType, uuids: List[str]) -> Dict[str, int]:
+        if not uuids:
+            return {}
+
+        mappings = self.db.query(ResourceMapping).filter(
+            ResourceMapping.environment_id == environment_id,
+            ResourceMapping.resource_type == resource_type,
+            ResourceMapping.uuid.in_(uuids)
+        ).all()
+
+        result = {}
+        for m in mappings:
+            try:
+                result[m.uuid] = int(m.remote_integer_id)
+            except ValueError:
+                pass
+        
+        return result
+    # [/DEF:get_remote_ids_batch:Function]
+
+# [/DEF:IdMappingService:Class]
+# [/DEF:backend.src.core.mapping_service:Module]
--- a/backend/src/core/migration_engine.py
+++ b/backend/src/core/migration_engine.py
@@ -11,28 +11,41 @@
 import zipfile
 import yaml
 import os
+import json
+import re
 import tempfile
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Optional, List
 from .logger import logger, belief_scope
+from src.core.mapping_service import IdMappingService
+from src.models.mapping import ResourceType
 # [/SECTION]

 # [DEF:MigrationEngine:Class]
 # @PURPOSE: Engine for transforming Superset export ZIPs.
 class MigrationEngine:
    
+    # [DEF:__init__:Function]
+    # @PURPOSE: Initializes the migration engine with optional ID mapping service.
+    # @PARAM: mapping_service (Optional[IdMappingService]) - Used for resolving target environment integer IDs.
+    def __init__(self, mapping_service: Optional[IdMappingService] = None):
+        self.mapping_service = mapping_service
+    # [/DEF:__init__:Function]
+
    # [DEF:transform_zip:Function]
-    # @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, and re-packages.
+    # @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, patches cross-filters, and re-packages.
    # @PARAM:   zip_path (str) - Path to the source ZIP file.
    # @PARAM:   output_path (str) - Path where the transformed ZIP will be saved.
    # @PARAM:   db_mapping (Dict[str, str]) - Mapping of source UUID to target UUID.
    # @PARAM:   strip_databases (bool) - Whether to remove the databases directory from the archive.
+    # @PARAM:   target_env_id (Optional[str]) - Used if fix_cross_filters is True to know which environment map to use.
+    # @PARAM:   fix_cross_filters (bool) - Whether to patch dashboard json_metadata.
    # @PRE: zip_path must point to a valid Superset export archive.
    # @POST: Transformed archive is saved to output_path.
    # @RETURN:  bool - True if successful.
-    def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True) -> bool:
+    def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True, target_env_id: Optional[str] = None, fix_cross_filters: bool = False) -> bool:
        """
-        Transform a Superset export ZIP by replacing database UUIDs.
+        Transform a Superset export ZIP by replacing database UUIDs and optionally fixing cross-filters.
        """
        with belief_scope("MigrationEngine.transform_zip"):
            with tempfile.TemporaryDirectory() as temp_dir_str:
@@ -44,8 +57,7 @@ class MigrationEngine:
                    with zipfile.ZipFile(zip_path, 'r') as zf:
                        zf.extractall(temp_dir)

-                    # 2. Transform YAMLs
-                    # Datasets are usually in datasets/*.yaml
+                    # 2. Transform YAMLs (Databases)
                    dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(temp_dir.glob("**/datasets/*.yaml"))
                    dataset_files = list(set(dataset_files))
                    
@@ -54,6 +66,20 @@ class MigrationEngine:
                        logger.info(f"[MigrationEngine.transform_zip][Action] Transforming dataset: {ds_file}")
                        self._transform_yaml(ds_file, db_mapping)

+                    # 2.5 Patch Cross-Filters (Dashboards)
+                    if fix_cross_filters and self.mapping_service and target_env_id:
+                        dash_files = list(temp_dir.glob("**/dashboards/**/*.yaml")) + list(temp_dir.glob("**/dashboards/*.yaml"))
+                        dash_files = list(set(dash_files))
+                        
+                        logger.info(f"[MigrationEngine.transform_zip][State] Found {len(dash_files)} dashboard files for patching.")
+                        
+                        # Gather all source UUID-to-ID mappings from the archive first
+                        source_id_to_uuid_map = self._extract_chart_uuids_from_archive(temp_dir)
+                        
+                        for dash_file in dash_files:
+                            logger.info(f"[MigrationEngine.transform_zip][Action] Patching dashboard: {dash_file}")
+                            self._patch_dashboard_metadata(dash_file, target_env_id, source_id_to_uuid_map)
+
                    # 3. Re-package
                    logger.info(f"[MigrationEngine.transform_zip][Action] Re-packaging ZIP to: {output_path} (strip_databases={strip_databases})")
                    with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
@@ -97,6 +123,100 @@ class MigrationEngine:
                yaml.dump(data, f)
    # [/DEF:_transform_yaml:Function]

+    # [DEF:_extract_chart_uuids_from_archive:Function]
+    # @PURPOSE: Scans the unpacked ZIP to map local exported integer IDs back to their UUIDs.
+    # @PARAM: temp_dir (Path) - Root dir of unpacked archive
+    # @RETURN: Dict[int, str] - Mapping of source Integer ID to UUID.
+    def _extract_chart_uuids_from_archive(self, temp_dir: Path) -> Dict[int, str]:
+        # Implementation Note: This is a placeholder for the logic that extracts 
+        # actual Source IDs. In a real scenario, this involves parsing chart YAMLs 
+        # or manifesting the export metadata structure where source IDs are stored.
+        # For simplicity in US1 MVP, we assume it's read from chart files if present.
+        mapping = {}
+        chart_files = list(temp_dir.glob("**/charts/**/*.yaml")) + list(temp_dir.glob("**/charts/*.yaml"))
+        for cf in set(chart_files):
+            try:
+                with open(cf, 'r') as f:
+                    cdata = yaml.safe_load(f)
+                    if cdata and 'id' in cdata and 'uuid' in cdata:
+                        mapping[cdata['id']] = cdata['uuid']
+            except Exception:
+                pass
+        return mapping
+    # [/DEF:_extract_chart_uuids_from_archive:Function]
+
+    # [DEF:_patch_dashboard_metadata:Function]
+    # @PURPOSE: Replaces integer IDs in json_metadata.
+    # @PARAM: file_path (Path)
+    # @PARAM: target_env_id (str)
+    # @PARAM: source_map (Dict[int, str])
+    def _patch_dashboard_metadata(self, file_path: Path, target_env_id: str, source_map: Dict[int, str]):
+        with belief_scope("MigrationEngine._patch_dashboard_metadata"):
+            try:
+                with open(file_path, 'r') as f:
+                    data = yaml.safe_load(f)
+
+                if not data or 'json_metadata' not in data:
+                    return
+
+                metadata_str = data['json_metadata']
+                if not metadata_str:
+                    return
+
+                metadata = json.loads(metadata_str)
+                modified = False
+
+                # We need to deeply traverse and replace. For MVP, string replacement over the raw JSON is an option,
+                # but careful dict traversal is safer.
+                
+                # Fetch target UUIDs for everything we know:
+                uuids_needed = list(source_map.values())
+                target_ids = self.mapping_service.get_remote_ids_batch(target_env_id, ResourceType.CHART, uuids_needed)
+                
+                if not target_ids:
+                    logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No remote target IDs found in mapping database.")
+                    return
+
+                # Map Source Int -> Target Int
+                source_to_target = {}
+                missing_targets = []
+                for s_id, s_uuid in source_map.items():
+                    if s_uuid in target_ids:
+                        source_to_target[s_id] = target_ids[s_uuid]
+                    else:
+                        missing_targets.append(s_id)
+                
+                if missing_targets:
+                    logger.warning(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Recoverable] Missing target IDs for source IDs: {missing_targets}. Cross-filters for these IDs might break.")
+
+                if not source_to_target:
+                    logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No source IDs matched remotely. Skipping patch.")
+                    return
+
+                # Complex metadata traversal would go here (e.g. for native_filter_configuration)
+                # We use regex replacement over the string for safety over unknown nested dicts.
+                
+                new_metadata_str = metadata_str
+                
+                # Replace chartId and datasetId assignments explicitly.
+                # Pattern: "datasetId": 42 or "chartId": 42
+                for s_id, t_id in source_to_target.items():
+                    # Replace in native_filter_configuration targets
+                    new_metadata_str = re.sub(r'("datasetId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
+                    new_metadata_str = re.sub(r'("chartId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
+
+                # Re-parse to validate valid JSON
+                data['json_metadata'] = json.dumps(json.loads(new_metadata_str))
+                
+                with open(file_path, 'w') as f:
+                    yaml.dump(data, f)
+                    logger.info(f"[MigrationEngine._patch_dashboard_metadata][Reason] Re-serialized modified JSON metadata for dashboard.")
+
+            except Exception as e:
+                logger.error(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Failed] Metadata patch failed: {e}")
+
+    # [/DEF:_patch_dashboard_metadata:Function]
+
 # [/DEF:MigrationEngine:Class]

 # [/DEF:backend.src.core.migration_engine:Module]
--- a/backend/src/models/dashboard.py
+++ b/backend/src/models/dashboard.py
@@ -26,6 +26,7 @@ class DashboardSelection(BaseModel):
    source_env_id: str
    target_env_id: str
    replace_db_config: bool = False
+    fix_cross_filters: bool = True
 # [/DEF:DashboardSelection:Class]

 # [/DEF:backend.src.models.dashboard:Module]
--- a/backend/src/models/mapping.py
+++ b/backend/src/models/mapping.py
@@ -19,6 +19,16 @@ import enum

 Base = declarative_base()

+# [DEF:ResourceType:Class]
+# @TIER: TRIVIAL
+# @PURPOSE: Enumeration of possible Superset resource types for ID mapping.
+class ResourceType(str, enum.Enum):
+    CHART = "chart"
+    DATASET = "dataset"
+    DASHBOARD = "dashboard"
+# [/DEF:ResourceType:Class]
+
+
 # [DEF:MigrationStatus:Class]
 # @TIER: TRIVIAL
 # @PURPOSE: Enumeration of possible migration job statuses.
@@ -70,6 +80,21 @@ class MigrationJob(Base):
    status = Column(SQLEnum(MigrationStatus), default=MigrationStatus.PENDING)
    replace_db = Column(Boolean, default=False)
    created_at = Column(DateTime(timezone=True), server_default=func.now())
-# [/DEF:MigrationJob:Class]
+# [DEF:ResourceMapping:Class]
+# @TIER: STANDARD
+# @PURPOSE: Maps a universal UUID for a resource to its actual ID on a specific environment.
+# @TEST_DATA: resource_mapping_record -> {'environment_id': 'prod-env-1', 'resource_type': 'chart', 'uuid': '123e4567-e89b-12d3-a456-426614174000', 'remote_integer_id': '42'}
+class ResourceMapping(Base):
+    __tablename__ = "resource_mappings"
+
+    id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
+    environment_id = Column(String, ForeignKey("environments.id"), nullable=False)
+    resource_type = Column(SQLEnum(ResourceType), nullable=False)
+    uuid = Column(String, nullable=False)
+    remote_integer_id = Column(String, nullable=False)  # Stored as string to handle potentially large or composite IDs safely, though Superset usually uses integers.
+    resource_name = Column(String, nullable=True) # Used for UI display
+    last_synced_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now())
+# [/DEF:ResourceMapping:Class]

 # [/DEF:backend.src.models.mapping:Module]
+