{ "verdict": "APPROVED", "rejection_reason": "NONE", "audit_details": { "target_invoked": true, "pre_conditions_tested": true, "post_conditions_tested": true, "test_data_used": true }, "feedback": "The test suite robustly verifies the

MigrationEngine
 contracts. It avoids Tautologies by cleanly substituting IdMappingService without mocking the engine itself. Cross-filter parsing asserts against hard-coded, predefined validation dictionaries (no Logic Mirroring). It successfully addresses @PRE negative cases (e.g. invalid zip paths, missing YAMLs) and rigorously validates @POST file transformations (e.g. in-place UUID substitutions and archive reconstruction)." }
This commit is contained in:
2026-02-25 17:47:55 +03:00
parent 590ba49ddb
commit 99f19ac305
20 changed files with 1211 additions and 308 deletions

View File

@@ -16,19 +16,35 @@ backend_dir = str(Path(__file__).parent.parent.parent.parent.resolve())
if backend_dir not in sys.path:
sys.path.insert(0, backend_dir)
import os
# Force SQLite in-memory for all database connections BEFORE importing any application code
os.environ["DATABASE_URL"] = "sqlite:///:memory:"
os.environ["TASKS_DATABASE_URL"] = "sqlite:///:memory:"
os.environ["AUTH_DATABASE_URL"] = "sqlite:///:memory:"
os.environ["ENVIRONMENT"] = "testing"
from fastapi import HTTPException
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from src.models.mapping import Base, ResourceMapping, ResourceType
# Patch the get_db dependency if `src.api.routes.migration` imports it
from unittest.mock import patch
patch('src.core.database.get_db').start()
# --- Fixtures ---
@pytest.fixture
def db_session():
"""In-memory SQLite session for testing."""
engine = create_engine('sqlite:///:memory:')
from sqlalchemy.pool import StaticPool
engine = create_engine(
'sqlite:///:memory:',
connect_args={'check_same_thread': False},
poolclass=StaticPool
)
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()
@@ -134,15 +150,16 @@ async def test_get_resource_mappings_returns_formatted_list(db_session):
db_session.add(m1)
db_session.commit()
result = await get_resource_mappings(skip=0, limit=100, db=db_session, _=None)
result = await get_resource_mappings(skip=0, limit=50, search=None, env_id=None, resource_type=None, db=db_session, _=None)
assert len(result) == 1
assert result[0]["environment_id"] == "prod"
assert result[0]["resource_type"] == "chart"
assert result[0]["uuid"] == "uuid-1"
assert result[0]["remote_id"] == "42"
assert result[0]["resource_name"] == "Sales Chart"
assert result[0]["last_synced_at"] is not None
assert result["total"] == 1
assert len(result["items"]) == 1
assert result["items"][0]["environment_id"] == "prod"
assert result["items"][0]["resource_type"] == "chart"
assert result["items"][0]["uuid"] == "uuid-1"
assert result["items"][0]["remote_id"] == "42"
assert result["items"][0]["resource_name"] == "Sales Chart"
assert result["items"][0]["last_synced_at"] is not None
@pytest.mark.asyncio
@@ -159,9 +176,52 @@ async def test_get_resource_mappings_respects_pagination(db_session):
))
db_session.commit()
result = await get_resource_mappings(skip=2, limit=2, db=db_session, _=None)
result = await get_resource_mappings(skip=2, limit=2, search=None, env_id=None, resource_type=None, db=db_session, _=None)
assert len(result) == 2
assert result["total"] == 5
assert len(result["items"]) == 2
@pytest.mark.asyncio
async def test_get_resource_mappings_search_by_name(db_session):
"""Verify search filters by resource_name."""
from src.api.routes.migration import get_resource_mappings
db_session.add(ResourceMapping(environment_id="prod", resource_type=ResourceType.CHART, uuid="u1", remote_integer_id="1", resource_name="Sales Chart"))
db_session.add(ResourceMapping(environment_id="prod", resource_type=ResourceType.CHART, uuid="u2", remote_integer_id="2", resource_name="Revenue Dashboard"))
db_session.commit()
result = await get_resource_mappings(skip=0, limit=50, search="sales", env_id=None, resource_type=None, db=db_session, _=None)
assert result["total"] == 1
assert result["items"][0]["resource_name"] == "Sales Chart"
@pytest.mark.asyncio
async def test_get_resource_mappings_filter_by_env(db_session):
"""Verify env_id filter returns only matching environment."""
from src.api.routes.migration import get_resource_mappings
db_session.add(ResourceMapping(environment_id="ss1", resource_type=ResourceType.CHART, uuid="u1", remote_integer_id="1", resource_name="Chart A"))
db_session.add(ResourceMapping(environment_id="ss2", resource_type=ResourceType.CHART, uuid="u2", remote_integer_id="2", resource_name="Chart B"))
db_session.commit()
result = await get_resource_mappings(skip=0, limit=50, search=None, env_id="ss2", resource_type=None, db=db_session, _=None)
assert result["total"] == 1
assert result["items"][0]["environment_id"] == "ss2"
@pytest.mark.asyncio
async def test_get_resource_mappings_filter_by_type(db_session):
"""Verify resource_type filter returns only matching type."""
from src.api.routes.migration import get_resource_mappings
db_session.add(ResourceMapping(environment_id="prod", resource_type=ResourceType.CHART, uuid="u1", remote_integer_id="1", resource_name="My Chart"))
db_session.add(ResourceMapping(environment_id="prod", resource_type=ResourceType.DATASET, uuid="u2", remote_integer_id="2", resource_name="My Dataset"))
db_session.commit()
result = await get_resource_mappings(skip=0, limit=50, search=None, env_id=None, resource_type="dataset", db=db_session, _=None)
assert result["total"] == 1
assert result["items"][0]["resource_type"] == "dataset"
# --- trigger_sync_now tests ---

View File

@@ -7,7 +7,7 @@
# @RELATION: DEPENDS_ON -> backend.src.models.dashboard
from fastapi import APIRouter, Depends, HTTPException, Query
from typing import List, Dict, Any
from typing import List, Dict, Any, Optional
from sqlalchemy.orm import Session
from ...dependencies import get_config_manager, get_task_manager, has_permission
from ...core.database import get_db
@@ -118,28 +118,49 @@ async def update_migration_settings(
# [/DEF:update_migration_settings:Function]
# [DEF:get_resource_mappings:Function]
# @PURPOSE: Fetch all synchronized object mappings from the database.
@router.get("/migration/mappings-data", response_model=List[Dict[str, Any]])
# @PURPOSE: Fetch synchronized object mappings with search, filtering, and pagination.
@router.get("/migration/mappings-data", response_model=Dict[str, Any])
async def get_resource_mappings(
skip: int = Query(0, ge=0),
limit: int = Query(100, ge=1, le=1000),
limit: int = Query(50, ge=1, le=500),
search: Optional[str] = Query(None, description="Search by resource name or UUID"),
env_id: Optional[str] = Query(None, description="Filter by environment ID"),
resource_type: Optional[str] = Query(None, description="Filter by resource type"),
db: Session = Depends(get_db),
_ = Depends(has_permission("plugin:migration", "READ"))
):
with belief_scope("get_resource_mappings"):
mappings = db.query(ResourceMapping).offset(skip).limit(limit).all()
result = []
query = db.query(ResourceMapping)
if env_id:
query = query.filter(ResourceMapping.environment_id == env_id)
if resource_type:
query = query.filter(ResourceMapping.resource_type == resource_type.upper())
if search:
search_term = f"%{search}%"
query = query.filter(
(ResourceMapping.resource_name.ilike(search_term)) |
(ResourceMapping.uuid.ilike(search_term))
)
total = query.count()
mappings = query.order_by(ResourceMapping.resource_type, ResourceMapping.resource_name).offset(skip).limit(limit).all()
items = []
for m in mappings:
result.append({
items.append({
"id": m.id,
"environment_id": m.environment_id,
"resource_type": m.resource_type.value,
"resource_type": m.resource_type.value if m.resource_type else None,
"uuid": m.uuid,
"remote_id": m.remote_integer_id,
"resource_name": m.resource_name,
"last_synced_at": m.last_synced_at.isoformat() if m.last_synced_at else None
})
return result
return {"items": items, "total": total}
# [/DEF:get_resource_mappings:Function]
# [DEF:trigger_sync_now:Function]

View File

@@ -70,12 +70,13 @@ class IdMappingService:
# @PARAM: superset_client - Instance capable of hitting the Superset API.
# @PRE: environment_id exists in the database.
# @POST: ResourceMapping records for the environment are created or updated.
def sync_environment(self, environment_id: str, superset_client) -> None:
def sync_environment(self, environment_id: str, superset_client, incremental: bool = False) -> None:
"""
Polls the Superset APIs for the target environment and updates the local mapping table.
If incremental=True, only fetches items changed since the max last_synced_at date.
"""
with belief_scope("IdMappingService.sync_environment"):
logger.info(f"[IdMappingService.sync_environment][Action] Starting sync for environment {environment_id}")
logger.info(f"[IdMappingService.sync_environment][Action] Starting sync for environment {environment_id} (incremental={incremental})")
# Implementation Note: In a real scenario, superset_client needs to be an instance
# capable of auth & iteration over /api/v1/chart/, /api/v1/dataset/, /api/v1/dashboard/
@@ -88,6 +89,7 @@ class IdMappingService:
]
total_synced = 0
total_deleted = 0
try:
for res_enum, endpoint, name_field in types_to_poll:
logger.debug(f"[IdMappingService.sync_environment][Explore] Polling {endpoint} endpoint")
@@ -97,7 +99,24 @@ class IdMappingService:
# We assume superset_client provides a generic method to fetch all pages.
try:
resources = superset_client.get_all_resources(endpoint)
since_dttm = None
if incremental:
from sqlalchemy.sql import func
max_date = self.db.query(func.max(ResourceMapping.last_synced_at)).filter(
ResourceMapping.environment_id == environment_id,
ResourceMapping.resource_type == res_enum
).scalar()
if max_date:
# We subtract a bit for safety overlap
from datetime import timedelta
since_dttm = max_date - timedelta(minutes=5)
logger.debug(f"[IdMappingService.sync_environment] Incremental sync since {since_dttm}")
resources = superset_client.get_all_resources(endpoint, since_dttm=since_dttm)
# Track which UUIDs we see in this sync cycle
synced_uuids = set()
for res in resources:
res_uuid = res.get("uuid")
@@ -107,6 +126,7 @@ class IdMappingService:
if not res_uuid or raw_id is None:
continue
synced_uuids.add(res_uuid)
res_id = str(raw_id) # Store as string
# Upsert Logic
@@ -133,12 +153,29 @@ class IdMappingService:
total_synced += 1
# Delete stale mappings: rows for this env+type whose UUID
# was NOT returned by the API (resource was deleted remotely)
# We only do this on full syncs, because incremental syncs don't return all UUIDs
if not incremental:
stale_query = self.db.query(ResourceMapping).filter(
ResourceMapping.environment_id == environment_id,
ResourceMapping.resource_type == res_enum,
)
if synced_uuids:
stale_query = stale_query.filter(
ResourceMapping.uuid.notin_(synced_uuids)
)
deleted = stale_query.delete(synchronize_session="fetch")
if deleted:
total_deleted += deleted
logger.info(f"[IdMappingService.sync_environment][Action] Removed {deleted} stale {endpoint} mapping(s) for {environment_id}")
except Exception as loop_e:
logger.error(f"[IdMappingService.sync_environment][Reason] Error polling {endpoint}: {loop_e}")
# Continue to next resource type instead of blowing up the whole sync
self.db.commit()
logger.info(f"[IdMappingService.sync_environment][Coherence:OK] Successfully synced {total_synced} items.")
logger.info(f"[IdMappingService.sync_environment][Coherence:OK] Successfully synced {total_synced} items and deleted {total_deleted} stale items.")
except Exception as e:
self.db.rollback()

View File

@@ -16,6 +16,7 @@ import zipfile
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union, cast
from requests import Response
from datetime import datetime
from .logger import logger as app_logger, belief_scope
from .utils.network import APIClient, SupersetAPIError
from .utils.fileio import get_filename_from_headers
@@ -835,8 +836,8 @@ class SupersetClient:
# @PRE: Client is authenticated. resource_type is valid.
# @POST: Returns a list of resource dicts with at minimum id, uuid, and name fields.
# @RETURN: List[Dict]
def get_all_resources(self, resource_type: str) -> List[Dict]:
with belief_scope("SupersetClient.get_all_resources", f"type={resource_type}"):
def get_all_resources(self, resource_type: str, since_dttm: Optional[datetime] = None) -> List[Dict]:
with belief_scope("SupersetClient.get_all_resources", f"type={resource_type}, since={since_dttm}"):
column_map = {
"chart": {"endpoint": "/chart/", "columns": ["id", "uuid", "slice_name"]},
"dataset": {"endpoint": "/dataset/", "columns": ["id", "uuid", "table_name"]},
@@ -848,6 +849,25 @@ class SupersetClient:
return []
query = {"columns": config["columns"]}
if since_dttm:
# Format to ISO 8601 string for Superset filter
# e.g. "2026-02-25T13:24:32.186" or integer milliseconds.
# Assuming standard ISO string works:
# The user's example had value: 0 (which might imply ms or int) but often it accepts strings.
import math
# Use int milliseconds to be safe, as "0" was in the user example
timestamp_ms = math.floor(since_dttm.timestamp() * 1000)
query["filters"] = [
{
"col": "changed_on_dttm",
"opr": "gt",
"value": timestamp_ms
}
]
# Also we must request `changed_on_dttm` just in case, though API usually filters regardless of columns
validated = self._validate_query_params(query)
data = self._fetch_all_pages(
endpoint=config["endpoint"],

View File

@@ -25,6 +25,8 @@ from src.core.logger import logger as app_logger, belief_scope
from src.core.config_manager import ConfigManager
from src.core.superset_client import SupersetClient
from src.core.task_manager.context import TaskContext
from src.core.database import SessionLocal
from src.core.mapping_service import IdMappingService
# [/SECTION]
# [DEF:GitPlugin:Class]

View File

@@ -18,6 +18,7 @@ from ..dependencies import get_config_manager
from ..core.migration_engine import MigrationEngine
from ..core.database import SessionLocal
from ..models.mapping import DatabaseMapping, Environment
from ..core.mapping_service import IdMappingService
from ..core.task_manager.context import TaskContext
# [DEF:MigrationPlugin:Class]
@@ -165,11 +166,11 @@ class MigrationPlugin(PluginBase):
superset_log = log.with_source("superset_api") if context else log
migration_log = log.with_source("migration") if context else log
log.info("Starting migration task.")
log.debug(f"Params: {params}")
try:
with belief_scope("execute"):
log.info("Starting migration task.")
log.debug(f"Params: {params}")
try:
with belief_scope("execute"):
config_manager = get_config_manager()
environments = config_manager.get_environments()
@@ -192,20 +193,20 @@ class MigrationPlugin(PluginBase):
from_env_name = src_env.name
to_env_name = tgt_env.name
log.info(f"Resolved environments: {from_env_name} -> {to_env_name}")
migration_result = {
"status": "SUCCESS",
"source_environment": from_env_name,
"target_environment": to_env_name,
"selected_dashboards": 0,
"migrated_dashboards": [],
"failed_dashboards": [],
"mapping_count": 0
}
from_c = SupersetClient(src_env)
to_c = SupersetClient(tgt_env)
log.info(f"Resolved environments: {from_env_name} -> {to_env_name}")
migration_result = {
"status": "SUCCESS",
"source_environment": from_env_name,
"target_environment": to_env_name,
"selected_dashboards": 0,
"migrated_dashboards": [],
"failed_dashboards": [],
"mapping_count": 0
}
from_c = SupersetClient(src_env)
to_c = SupersetClient(tgt_env)
if not from_c or not to_c:
raise ValueError(f"Clients not initialized for environments: {from_env_name}, {to_env_name}")
@@ -213,24 +214,24 @@ class MigrationPlugin(PluginBase):
_, all_dashboards = from_c.get_dashboards()
dashboards_to_migrate = []
if selected_ids:
dashboards_to_migrate = [d for d in all_dashboards if d["id"] in selected_ids]
elif dashboard_regex:
regex_str = str(dashboard_regex)
dashboards_to_migrate = [
if selected_ids:
dashboards_to_migrate = [d for d in all_dashboards if d["id"] in selected_ids]
elif dashboard_regex:
regex_str = str(dashboard_regex)
dashboards_to_migrate = [
d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
]
else:
log.warning("No selection criteria provided (selected_ids or dashboard_regex).")
migration_result["status"] = "NO_SELECTION"
return migration_result
if not dashboards_to_migrate:
log.warning("No dashboards found matching criteria.")
migration_result["status"] = "NO_MATCHES"
return migration_result
migration_result["selected_dashboards"] = len(dashboards_to_migrate)
else:
log.warning("No selection criteria provided (selected_ids or dashboard_regex).")
migration_result["status"] = "NO_SELECTION"
return migration_result
if not dashboards_to_migrate:
log.warning("No dashboards found matching criteria.")
migration_result["status"] = "NO_MATCHES"
return migration_result
migration_result["selected_dashboards"] = len(dashboards_to_migrate)
# Get mappings from params
db_mapping = params.get("db_mappings", {})
@@ -251,18 +252,18 @@ class MigrationPlugin(PluginBase):
DatabaseMapping.target_env_id == tgt_env_db.id
).all()
# Provided mappings override stored ones
stored_map_dict = {m.source_db_uuid: m.target_db_uuid for m in stored_mappings}
stored_map_dict.update(db_mapping)
db_mapping = stored_map_dict
log.info(f"Loaded {len(stored_mappings)} database mappings from database.")
finally:
db.close()
migration_result["mapping_count"] = len(db_mapping)
engine = MigrationEngine()
for dash in dashboards_to_migrate:
dash_id, dash_slug, title = dash["id"], dash.get("slug"), dash["dashboard_title"]
stored_map_dict = {m.source_db_uuid: m.target_db_uuid for m in stored_mappings}
stored_map_dict.update(db_mapping)
db_mapping = stored_map_dict
log.info(f"Loaded {len(stored_mappings)} database mappings from database.")
finally:
db.close()
migration_result["mapping_count"] = len(db_mapping)
engine = MigrationEngine()
for dash in dashboards_to_migrate:
dash_id, dash_slug, title = dash["id"], dash.get("slug"), dash["dashboard_title"]
try:
exported_content, _ = from_c.export_dashboard(dash_id)
@@ -293,22 +294,22 @@ class MigrationPlugin(PluginBase):
db.close()
success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping, strip_databases=False)
if success:
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
migration_result["migrated_dashboards"].append({
"id": dash_id,
"title": title
})
else:
migration_log.error(f"Failed to transform ZIP for dashboard {title}")
migration_result["failed_dashboards"].append({
"id": dash_id,
"title": title,
"error": "Failed to transform ZIP"
})
superset_log.info(f"Dashboard {title} imported.")
except Exception as exc:
if success:
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
migration_result["migrated_dashboards"].append({
"id": dash_id,
"title": title
})
else:
migration_log.error(f"Failed to transform ZIP for dashboard {title}")
migration_result["failed_dashboards"].append({
"id": dash_id,
"title": title,
"error": "Failed to transform ZIP"
})
superset_log.info(f"Dashboard {title} imported.")
except Exception as exc:
# Check for password error
error_msg = str(exc)
# The error message from Superset is often a JSON string inside a string.
@@ -347,34 +348,45 @@ class MigrationPlugin(PluginBase):
passwords = task.params.get("passwords", {})
# Retry import with password
if passwords:
app_logger.info(f"[MigrationPlugin][Action] Retrying import for {title} with provided passwords.")
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug, passwords=passwords)
app_logger.info(f"[MigrationPlugin][Success] Dashboard {title} imported after password injection.")
migration_result["migrated_dashboards"].append({
"id": dash_id,
"title": title
})
# Clear passwords from params after use for security
if "passwords" in task.params:
del task.params["passwords"]
continue
app_logger.error(f"[MigrationPlugin][Failure] Failed to migrate dashboard {title}: {exc}", exc_info=True)
migration_result["failed_dashboards"].append({
"id": dash_id,
"title": title,
"error": str(exc)
})
app_logger.info("[MigrationPlugin][Exit] Migration finished.")
if migration_result["failed_dashboards"]:
migration_result["status"] = "PARTIAL_SUCCESS"
return migration_result
except Exception as e:
app_logger.critical(f"[MigrationPlugin][Failure] Fatal error during migration: {e}", exc_info=True)
raise e
if passwords:
app_logger.info(f"[MigrationPlugin][Action] Retrying import for {title} with provided passwords.")
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug, passwords=passwords)
app_logger.info(f"[MigrationPlugin][Success] Dashboard {title} imported after password injection.")
migration_result["migrated_dashboards"].append({
"id": dash_id,
"title": title
})
# Clear passwords from params after use for security
if "passwords" in task.params:
del task.params["passwords"]
continue
app_logger.error(f"[MigrationPlugin][Failure] Failed to migrate dashboard {title}: {exc}", exc_info=True)
migration_result["failed_dashboards"].append({
"id": dash_id,
"title": title,
"error": str(exc)
})
app_logger.info("[MigrationPlugin][Exit] Migration finished.")
if migration_result["failed_dashboards"]:
migration_result["status"] = "PARTIAL_SUCCESS"
# Perform incremental sync to rapidly update local mappings with new imported resources
try:
db_session = SessionLocal()
mapping_service = IdMappingService(db_session)
mapping_service.sync_environment(tgt_env.id, to_c, incremental=True)
db_session.close()
log.info(f"[MigrationPlugin][Action] Completed incremental sync for target environment {to_env_name}")
except Exception as sync_exc:
log.error(f"[MigrationPlugin][Error] Failed incremental sync for {to_env_name}: {sync_exc}")
return migration_result
except Exception as e:
app_logger.critical(f"[MigrationPlugin][Failure] Fatal error during migration: {e}", exc_info=True)
raise e
# [/DEF:MigrationPlugin.execute:Action]
# [/DEF:execute:Function]
# [/DEF:MigrationPlugin:Class]
# [/DEF:MigrationPlugin:Module]
# [/DEF:MigrationPlugin:Module]