dry run migration
This commit is contained in:
139
backend/src/core/migration/archive_parser.py
Normal file
139
backend/src/core/migration/archive_parser.py
Normal file
@@ -0,0 +1,139 @@
|
||||
# [DEF:backend.src.core.migration.archive_parser:Module]
|
||||
# @TIER: STANDARD
|
||||
# @SEMANTICS: migration, zip, parser, yaml, metadata
|
||||
# @PURPOSE: Parse Superset export ZIP archives into normalized object catalogs for diffing.
|
||||
# @LAYER: Core
|
||||
# @RELATION: DEPENDS_ON -> backend.src.core.logger
|
||||
# @INVARIANT: Parsing is read-only and never mutates archive files.
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
from ..logger import logger, belief_scope
|
||||
|
||||
|
||||
# [DEF:MigrationArchiveParser:Class]
|
||||
# @PURPOSE: Extract normalized dashboards/charts/datasets metadata from ZIP archives.
|
||||
class MigrationArchiveParser:
|
||||
# [DEF:extract_objects_from_zip:Function]
|
||||
# @PURPOSE: Extract object catalogs from Superset archive.
|
||||
# @PRE: zip_path points to a valid readable ZIP.
|
||||
# @POST: Returns object lists grouped by resource type.
|
||||
# @RETURN: Dict[str, List[Dict[str, Any]]]
|
||||
def extract_objects_from_zip(self, zip_path: str) -> Dict[str, List[Dict[str, Any]]]:
|
||||
with belief_scope("MigrationArchiveParser.extract_objects_from_zip"):
|
||||
result: Dict[str, List[Dict[str, Any]]] = {
|
||||
"dashboards": [],
|
||||
"charts": [],
|
||||
"datasets": [],
|
||||
}
|
||||
with tempfile.TemporaryDirectory() as temp_dir_str:
|
||||
temp_dir = Path(temp_dir_str)
|
||||
with zipfile.ZipFile(zip_path, "r") as zip_file:
|
||||
zip_file.extractall(temp_dir)
|
||||
|
||||
result["dashboards"] = self._collect_yaml_objects(temp_dir, "dashboards")
|
||||
result["charts"] = self._collect_yaml_objects(temp_dir, "charts")
|
||||
result["datasets"] = self._collect_yaml_objects(temp_dir, "datasets")
|
||||
|
||||
return result
|
||||
# [/DEF:extract_objects_from_zip:Function]
|
||||
|
||||
# [DEF:_collect_yaml_objects:Function]
|
||||
# @PURPOSE: Read and normalize YAML manifests for one object type.
|
||||
# @PRE: object_type is one of dashboards/charts/datasets.
|
||||
# @POST: Returns only valid normalized objects.
|
||||
def _collect_yaml_objects(self, root_dir: Path, object_type: str) -> List[Dict[str, Any]]:
|
||||
with belief_scope("MigrationArchiveParser._collect_yaml_objects"):
|
||||
files = list(root_dir.glob(f"**/{object_type}/**/*.yaml")) + list(root_dir.glob(f"**/{object_type}/*.yaml"))
|
||||
objects: List[Dict[str, Any]] = []
|
||||
for file_path in set(files):
|
||||
try:
|
||||
with open(file_path, "r") as file_obj:
|
||||
payload = yaml.safe_load(file_obj) or {}
|
||||
normalized = self._normalize_object_payload(payload, object_type)
|
||||
if normalized:
|
||||
objects.append(normalized)
|
||||
except Exception as exc:
|
||||
logger.reflect(
|
||||
"[MigrationArchiveParser._collect_yaml_objects][REFLECT] skip_invalid_yaml path=%s error=%s",
|
||||
file_path,
|
||||
exc,
|
||||
)
|
||||
return objects
|
||||
# [/DEF:_collect_yaml_objects:Function]
|
||||
|
||||
# [DEF:_normalize_object_payload:Function]
|
||||
# @PURPOSE: Convert raw YAML payload to stable diff signature shape.
|
||||
# @PRE: payload is parsed YAML mapping.
|
||||
# @POST: Returns normalized descriptor with `uuid`, `title`, and `signature`.
|
||||
def _normalize_object_payload(self, payload: Dict[str, Any], object_type: str) -> Optional[Dict[str, Any]]:
|
||||
with belief_scope("MigrationArchiveParser._normalize_object_payload"):
|
||||
if not isinstance(payload, dict):
|
||||
return None
|
||||
uuid = payload.get("uuid")
|
||||
if not uuid:
|
||||
return None
|
||||
|
||||
if object_type == "dashboards":
|
||||
title = payload.get("dashboard_title") or payload.get("title")
|
||||
signature = {
|
||||
"title": title,
|
||||
"slug": payload.get("slug"),
|
||||
"position_json": payload.get("position_json"),
|
||||
"json_metadata": payload.get("json_metadata"),
|
||||
"description": payload.get("description"),
|
||||
"owners": payload.get("owners"),
|
||||
}
|
||||
return {
|
||||
"uuid": str(uuid),
|
||||
"title": title or f"Dashboard {uuid}",
|
||||
"signature": json.dumps(signature, sort_keys=True, default=str),
|
||||
"owners": payload.get("owners") or [],
|
||||
}
|
||||
|
||||
if object_type == "charts":
|
||||
title = payload.get("slice_name") or payload.get("name")
|
||||
signature = {
|
||||
"title": title,
|
||||
"viz_type": payload.get("viz_type"),
|
||||
"params": payload.get("params"),
|
||||
"query_context": payload.get("query_context"),
|
||||
"datasource_uuid": payload.get("datasource_uuid"),
|
||||
"dataset_uuid": payload.get("dataset_uuid"),
|
||||
}
|
||||
return {
|
||||
"uuid": str(uuid),
|
||||
"title": title or f"Chart {uuid}",
|
||||
"signature": json.dumps(signature, sort_keys=True, default=str),
|
||||
"dataset_uuid": payload.get("datasource_uuid") or payload.get("dataset_uuid"),
|
||||
}
|
||||
|
||||
if object_type == "datasets":
|
||||
title = payload.get("table_name") or payload.get("name")
|
||||
signature = {
|
||||
"title": title,
|
||||
"schema": payload.get("schema"),
|
||||
"database_uuid": payload.get("database_uuid"),
|
||||
"sql": payload.get("sql"),
|
||||
"columns": payload.get("columns"),
|
||||
"metrics": payload.get("metrics"),
|
||||
}
|
||||
return {
|
||||
"uuid": str(uuid),
|
||||
"title": title or f"Dataset {uuid}",
|
||||
"signature": json.dumps(signature, sort_keys=True, default=str),
|
||||
"database_uuid": payload.get("database_uuid"),
|
||||
}
|
||||
|
||||
return None
|
||||
# [/DEF:_normalize_object_payload:Function]
|
||||
|
||||
|
||||
# [/DEF:MigrationArchiveParser:Class]
|
||||
# [/DEF:backend.src.core.migration.archive_parser:Module]
|
||||
Reference in New Issue
Block a user