ready for test

This commit is contained in:
2026-02-25 13:35:09 +03:00
parent 21e969a769
commit 33433c3173
11 changed files with 994 additions and 351 deletions

View File

@@ -11,28 +11,41 @@
import zipfile
import yaml
import os
import json
import re
import tempfile
from pathlib import Path
from typing import Dict
from typing import Dict, Optional, List
from .logger import logger, belief_scope
from src.core.mapping_service import IdMappingService
from src.models.mapping import ResourceType
# [/SECTION]
# [DEF:MigrationEngine:Class]
# @PURPOSE: Engine for transforming Superset export ZIPs.
class MigrationEngine:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the migration engine with optional ID mapping service.
# @PARAM: mapping_service (Optional[IdMappingService]) - Used for resolving target environment integer IDs.
def __init__(self, mapping_service: Optional[IdMappingService] = None):
self.mapping_service = mapping_service
# [/DEF:__init__:Function]
# [DEF:transform_zip:Function]
# @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, and re-packages.
# @PURPOSE: Extracts ZIP, replaces database UUIDs in YAMLs, patches cross-filters, and re-packages.
# @PARAM: zip_path (str) - Path to the source ZIP file.
# @PARAM: output_path (str) - Path where the transformed ZIP will be saved.
# @PARAM: db_mapping (Dict[str, str]) - Mapping of source UUID to target UUID.
# @PARAM: strip_databases (bool) - Whether to remove the databases directory from the archive.
# @PARAM: target_env_id (Optional[str]) - Used if fix_cross_filters is True to know which environment map to use.
# @PARAM: fix_cross_filters (bool) - Whether to patch dashboard json_metadata.
# @PRE: zip_path must point to a valid Superset export archive.
# @POST: Transformed archive is saved to output_path.
# @RETURN: bool - True if successful.
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True) -> bool:
def transform_zip(self, zip_path: str, output_path: str, db_mapping: Dict[str, str], strip_databases: bool = True, target_env_id: Optional[str] = None, fix_cross_filters: bool = False) -> bool:
"""
Transform a Superset export ZIP by replacing database UUIDs.
Transform a Superset export ZIP by replacing database UUIDs and optionally fixing cross-filters.
"""
with belief_scope("MigrationEngine.transform_zip"):
with tempfile.TemporaryDirectory() as temp_dir_str:
@@ -44,8 +57,7 @@ class MigrationEngine:
with zipfile.ZipFile(zip_path, 'r') as zf:
zf.extractall(temp_dir)
# 2. Transform YAMLs
# Datasets are usually in datasets/*.yaml
# 2. Transform YAMLs (Databases)
dataset_files = list(temp_dir.glob("**/datasets/**/*.yaml")) + list(temp_dir.glob("**/datasets/*.yaml"))
dataset_files = list(set(dataset_files))
@@ -54,6 +66,20 @@ class MigrationEngine:
logger.info(f"[MigrationEngine.transform_zip][Action] Transforming dataset: {ds_file}")
self._transform_yaml(ds_file, db_mapping)
# 2.5 Patch Cross-Filters (Dashboards)
if fix_cross_filters and self.mapping_service and target_env_id:
dash_files = list(temp_dir.glob("**/dashboards/**/*.yaml")) + list(temp_dir.glob("**/dashboards/*.yaml"))
dash_files = list(set(dash_files))
logger.info(f"[MigrationEngine.transform_zip][State] Found {len(dash_files)} dashboard files for patching.")
# Gather all source UUID-to-ID mappings from the archive first
source_id_to_uuid_map = self._extract_chart_uuids_from_archive(temp_dir)
for dash_file in dash_files:
logger.info(f"[MigrationEngine.transform_zip][Action] Patching dashboard: {dash_file}")
self._patch_dashboard_metadata(dash_file, target_env_id, source_id_to_uuid_map)
# 3. Re-package
logger.info(f"[MigrationEngine.transform_zip][Action] Re-packaging ZIP to: {output_path} (strip_databases={strip_databases})")
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
@@ -97,6 +123,100 @@ class MigrationEngine:
yaml.dump(data, f)
# [/DEF:_transform_yaml:Function]
# [DEF:_extract_chart_uuids_from_archive:Function]
# @PURPOSE: Scans the unpacked ZIP to map local exported integer IDs back to their UUIDs.
# @PARAM: temp_dir (Path) - Root dir of unpacked archive
# @RETURN: Dict[int, str] - Mapping of source Integer ID to UUID.
def _extract_chart_uuids_from_archive(self, temp_dir: Path) -> Dict[int, str]:
# Implementation Note: This is a placeholder for the logic that extracts
# actual Source IDs. In a real scenario, this involves parsing chart YAMLs
# or manifesting the export metadata structure where source IDs are stored.
# For simplicity in US1 MVP, we assume it's read from chart files if present.
mapping = {}
chart_files = list(temp_dir.glob("**/charts/**/*.yaml")) + list(temp_dir.glob("**/charts/*.yaml"))
for cf in set(chart_files):
try:
with open(cf, 'r') as f:
cdata = yaml.safe_load(f)
if cdata and 'id' in cdata and 'uuid' in cdata:
mapping[cdata['id']] = cdata['uuid']
except Exception:
pass
return mapping
# [/DEF:_extract_chart_uuids_from_archive:Function]
# [DEF:_patch_dashboard_metadata:Function]
# @PURPOSE: Replaces integer IDs in json_metadata.
# @PARAM: file_path (Path)
# @PARAM: target_env_id (str)
# @PARAM: source_map (Dict[int, str])
def _patch_dashboard_metadata(self, file_path: Path, target_env_id: str, source_map: Dict[int, str]):
with belief_scope("MigrationEngine._patch_dashboard_metadata"):
try:
with open(file_path, 'r') as f:
data = yaml.safe_load(f)
if not data or 'json_metadata' not in data:
return
metadata_str = data['json_metadata']
if not metadata_str:
return
metadata = json.loads(metadata_str)
modified = False
# We need to deeply traverse and replace. For MVP, string replacement over the raw JSON is an option,
# but careful dict traversal is safer.
# Fetch target UUIDs for everything we know:
uuids_needed = list(source_map.values())
target_ids = self.mapping_service.get_remote_ids_batch(target_env_id, ResourceType.CHART, uuids_needed)
if not target_ids:
logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No remote target IDs found in mapping database.")
return
# Map Source Int -> Target Int
source_to_target = {}
missing_targets = []
for s_id, s_uuid in source_map.items():
if s_uuid in target_ids:
source_to_target[s_id] = target_ids[s_uuid]
else:
missing_targets.append(s_id)
if missing_targets:
logger.warning(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Recoverable] Missing target IDs for source IDs: {missing_targets}. Cross-filters for these IDs might break.")
if not source_to_target:
logger.info("[MigrationEngine._patch_dashboard_metadata][Reflect] No source IDs matched remotely. Skipping patch.")
return
# Complex metadata traversal would go here (e.g. for native_filter_configuration)
# We use regex replacement over the string for safety over unknown nested dicts.
new_metadata_str = metadata_str
# Replace chartId and datasetId assignments explicitly.
# Pattern: "datasetId": 42 or "chartId": 42
for s_id, t_id in source_to_target.items():
# Replace in native_filter_configuration targets
new_metadata_str = re.sub(r'("datasetId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
new_metadata_str = re.sub(r'("chartId"\s*:\s*)' + str(s_id) + r'(\b)', r'\g<1>' + str(t_id) + r'\g<2>', new_metadata_str)
# Re-parse to validate valid JSON
data['json_metadata'] = json.dumps(json.loads(new_metadata_str))
with open(file_path, 'w') as f:
yaml.dump(data, f)
logger.info(f"[MigrationEngine._patch_dashboard_metadata][Reason] Re-serialized modified JSON metadata for dashboard.")
except Exception as e:
logger.error(f"[MigrationEngine._patch_dashboard_metadata][Coherence:Failed] Metadata patch failed: {e}")
# [/DEF:_patch_dashboard_metadata:Function]
# [/DEF:MigrationEngine:Class]
# [/DEF:backend.src.core.migration_engine:Module]