feat: Implement recursive storage listing and directory browsing for backups, and add a migration option to fix cross-filters.

2026-02-25 20:01:33 +03:00
parent 5d42a6b930
commit f9ac282596
12 changed files with 533 additions and 53 deletions
--- a/backend/src/api/routes/assistant.py
+++ b/backend/src/api/routes/assistant.py
@@ -851,9 +851,9 @@ def _build_tool_catalog(current_user: User, config_manager: ConfigManager, db: S
        {
            "operation": "execute_migration",
            "domain": "migration",
-            "description": "Run dashboard migration (id/slug/title) between environments",
+            "description": "Run dashboard migration (id/slug/title) between environments. Optional boolean flags: replace_db_config, fix_cross_filters",
            "required_entities": ["source_env", "target_env"],
-            "optional_entities": ["dashboard_id", "dashboard_ref"],
+            "optional_entities": ["dashboard_id", "dashboard_ref", "replace_db_config", "fix_cross_filters"],
            "risk_level": "guarded",
            "requires_confirmation": False,
        },
@@ -1208,20 +1208,30 @@ async def _dispatch_intent(
    if operation == "execute_migration":
        _check_any_permission(current_user, [("plugin:migration", "EXECUTE"), ("plugin:superset-migration", "EXECUTE")])
        src_token = entities.get("source_env")
+        dashboard_ref = entities.get("dashboard_ref")
        dashboard_id = _resolve_dashboard_id_entity(entities, config_manager, env_hint=src_token)
        src = _resolve_env_id(src_token, config_manager)
        tgt = _resolve_env_id(entities.get("target_env"), config_manager)
-        if not dashboard_id or not src or not tgt:
-            raise HTTPException(status_code=422, detail="Missing dashboard_id/dashboard_ref/source_env/target_env")
+        if not src or not tgt:
+            raise HTTPException(status_code=422, detail="Missing source_env/target_env")
+        if not dashboard_id and not dashboard_ref:
+            raise HTTPException(status_code=422, detail="Missing dashboard_id/dashboard_ref")
+
+        migration_params: Dict[str, Any] = {
+            "source_env_id": src,
+            "target_env_id": tgt,
+            "replace_db_config": _coerce_query_bool(entities.get("replace_db_config", False)),
+            "fix_cross_filters": _coerce_query_bool(entities.get("fix_cross_filters", True)),
+        }
+        if dashboard_id:
+            migration_params["selected_ids"] = [dashboard_id]
+        else:
+            # Fallback: pass dashboard_ref as regex for the migration plugin to match
+            migration_params["dashboard_regex"] = str(dashboard_ref)

        task = await task_manager.create_task(
            plugin_id="superset-migration",
-            params={
-                "selected_ids": [dashboard_id],
-                "source_env_id": src,
-                "target_env_id": tgt,
-                "replace_db_config": False,
-            },
+            params=migration_params,
            user_id=current_user.id,
        )
        return (
--- a/backend/src/api/routes/storage.py
+++ b/backend/src/api/routes/storage.py
@@ -36,6 +36,7 @@ router = APIRouter(tags=["storage"])
 async def list_files(
    category: Optional[FileCategory] = None,
    path: Optional[str] = None,
+    recursive: bool = False,
    plugin_loader=Depends(get_plugin_loader),
    _ = Depends(has_permission("plugin:storage", "READ"))
 ):
@@ -43,7 +44,7 @@ async def list_files(
        storage_plugin: StoragePlugin = plugin_loader.get_plugin("storage-manager")
        if not storage_plugin:
            raise HTTPException(status_code=500, detail="Storage plugin not loaded")
-        return storage_plugin.list_files(category, path)
+        return storage_plugin.list_files(category, path, recursive)
 # [/DEF:list_files:Function]

 # [DEF:upload_file:Function]
@@ -143,4 +144,4 @@ async def download_file(
            raise HTTPException(status_code=400, detail=str(e))
 # [/DEF:download_file:Function]

-# [/DEF:storage_routes:Module]
+# [/DEF:storage_routes:Module]
--- a/backend/src/plugins/migration.py
+++ b/backend/src/plugins/migration.py
@@ -150,6 +150,7 @@ class MigrationPlugin(PluginBase):
        dashboard_regex = params.get("dashboard_regex")
        
        replace_db_config = params.get("replace_db_config", False)
+        fix_cross_filters = params.get("fix_cross_filters", True)
        params.get("from_db_id")
        params.get("to_db_id")
        
@@ -217,9 +218,9 @@ class MigrationPlugin(PluginBase):
            if selected_ids:
                dashboards_to_migrate = [d for d in all_dashboards if d["id"] in selected_ids]
            elif dashboard_regex:
-                regex_str = str(dashboard_regex)
+                regex_pattern = re.compile(str(dashboard_regex), re.IGNORECASE)
                dashboards_to_migrate = [
-                    d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
+                    d for d in all_dashboards if regex_pattern.search(d.get("dashboard_title", ""))
                ]
            else:
                log.warning("No selection criteria provided (selected_ids or dashboard_regex).")
@@ -270,7 +271,14 @@ class MigrationPlugin(PluginBase):
                    with create_temp_file(content=exported_content, dry_run=True, suffix=".zip") as tmp_zip_path:
                        # Always transform to strip databases to avoid password errors
                        with create_temp_file(suffix=".zip", dry_run=True) as tmp_new_zip:
-                            success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping, strip_databases=False)
+                            success = engine.transform_zip(
+                                str(tmp_zip_path), 
+                                str(tmp_new_zip), 
+                                db_mapping, 
+                                strip_databases=False, 
+                                target_env_id=tgt_env.id if tgt_env else None, 
+                                fix_cross_filters=fix_cross_filters
+                            )
                            
                            if not success and replace_db_config:
                                # Signal missing mapping and wait (only if we care about mappings)
@@ -283,16 +291,23 @@ class MigrationPlugin(PluginBase):
                                    # (Mappings would be updated in task.params by resolve_task)
                                    db = SessionLocal()
                                    try:
-                                        src_env = db.query(Environment).filter(Environment.name == from_env_name).first()
-                                        tgt_env = db.query(Environment).filter(Environment.name == to_env_name).first()
+                                        src_env_rt = db.query(Environment).filter(Environment.name == from_env_name).first()
+                                        tgt_env_rt = db.query(Environment).filter(Environment.name == to_env_name).first()
                                        mappings = db.query(DatabaseMapping).filter(
-                                            DatabaseMapping.source_env_id == src_env.id,
-                                            DatabaseMapping.target_env_id == tgt_env.id
+                                            DatabaseMapping.source_env_id == src_env_rt.id,
+                                            DatabaseMapping.target_env_id == tgt_env_rt.id
                                        ).all()
                                        db_mapping = {m.source_db_uuid: m.target_db_uuid for m in mappings}
                                    finally:
                                        db.close()
-                                    success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping, strip_databases=False)
+                                    success = engine.transform_zip(
+                                        str(tmp_zip_path), 
+                                        str(tmp_new_zip), 
+                                        db_mapping, 
+                                        strip_databases=False,
+                                        target_env_id=tgt_env.id if tgt_env else None,
+                                        fix_cross_filters=fix_cross_filters
+                                    )

                            if success:
                                to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
--- a/backend/src/plugins/storage/plugin.py
+++ b/backend/src/plugins/storage/plugin.py
@@ -212,13 +212,21 @@ class StoragePlugin(PluginBase):
    # @PURPOSE: Lists all files and directories in a specific category and subpath.
    # @PARAM: category (Optional[FileCategory]) - The category to list.
    # @PARAM: subpath (Optional[str]) - Nested path within the category.
+    # @PARAM: recursive (bool) - Whether to scan nested subdirectories recursively.
    # @PRE: Storage root must exist.
    # @POST: Returns a list of StoredFile objects.
    # @RETURN: List[StoredFile] - List of file and directory metadata objects.
-    def list_files(self, category: Optional[FileCategory] = None, subpath: Optional[str] = None) -> List[StoredFile]:
+    def list_files(
+        self,
+        category: Optional[FileCategory] = None,
+        subpath: Optional[str] = None,
+        recursive: bool = False,
+    ) -> List[StoredFile]:
        with belief_scope("StoragePlugin:list_files"):
            root = self.get_storage_root()
-            logger.info(f"[StoragePlugin][Action] Listing files in root: {root}, category: {category}, subpath: {subpath}")
+            logger.info(
+                f"[StoragePlugin][Action] Listing files in root: {root}, category: {category}, subpath: {subpath}, recursive: {recursive}"
+            )
            files = []
            
            categories = [category] if category else list(FileCategory)
@@ -235,17 +243,37 @@ class StoragePlugin(PluginBase):
                    continue
                
                logger.debug(f"[StoragePlugin][Action] Scanning directory: {target_dir}")
-                
+
+                if recursive:
+                    for current_root, dirs, filenames in os.walk(target_dir):
+                        dirs[:] = [d for d in dirs if "Logs" not in d]
+                        for filename in filenames:
+                            file_path = Path(current_root) / filename
+                            if "Logs" in str(file_path):
+                                continue
+                            stat = file_path.stat()
+                            files.append(
+                                StoredFile(
+                                    name=filename,
+                                    path=str(file_path.relative_to(root)),
+                                    size=stat.st_size,
+                                    created_at=datetime.fromtimestamp(stat.st_ctime),
+                                    category=cat,
+                                    mime_type=None,
+                                )
+                            )
+                    continue
+
                # Use os.scandir for better performance and to distinguish files vs dirs
                with os.scandir(target_dir) as it:
                    for entry in it:
                        # Skip logs
                        if "Logs" in entry.path:
                            continue
-                        
+
                        stat = entry.stat()
                        is_dir = entry.is_dir()
-                        
+
                        files.append(StoredFile(
                            name=entry.name,
                            path=str(Path(entry.path).relative_to(root)),
@@ -341,4 +369,4 @@ class StoragePlugin(PluginBase):
    # [/DEF:get_file_path:Function]

 # [/DEF:StoragePlugin:Class]
-# [/DEF:StoragePlugin:Module]
+# [/DEF:StoragePlugin:Module]
--- a/backend/src/scripts/seed_superset_load_test.py
+++ b/backend/src/scripts/seed_superset_load_test.py
@@ -0,0 +1,297 @@
+# [DEF:backend.src.scripts.seed_superset_load_test:Module]
+#
+# @TIER: STANDARD
+# @SEMANTICS: superset, load-test, charts, dashboards, seed, stress
+# @PURPOSE: Creates randomized load-test data in Superset by cloning chart configurations and creating dashboards in target environments.
+# @LAYER: Scripts
+# @RELATION: USES -> backend.src.core.config_manager.ConfigManager
+# @RELATION: USES -> backend.src.core.superset_client.SupersetClient
+# @INVARIANT: Created chart and dashboard names are globally unique for one script run.
+
+# [SECTION: IMPORTS]
+import argparse
+import json
+import random
+import sys
+import uuid
+from pathlib import Path
+from typing import Dict, List, Optional
+
+sys.path.append(str(Path(__file__).parent.parent.parent))
+
+from src.core.config_manager import ConfigManager
+from src.core.config_models import Environment
+from src.core.logger import belief_scope, logger
+from src.core.superset_client import SupersetClient
+# [/SECTION]
+
+
+# [DEF:_parse_args:Function]
+# @PURPOSE: Parses CLI arguments for load-test data generation.
+# @PRE: Script is called from CLI.
+# @POST: Returns validated argument namespace.
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Seed Superset with load-test charts and dashboards")
+    parser.add_argument("--envs", nargs="+", default=["ss1", "ss2"], help="Target environment IDs")
+    parser.add_argument("--charts", type=int, default=10000, help="Target number of charts to create")
+    parser.add_argument("--dashboards", type=int, default=500, help="Target number of dashboards to create")
+    parser.add_argument("--template-pool-size", type=int, default=200, help="How many source charts to sample as templates per env")
+    parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
+    parser.add_argument("--max-errors", type=int, default=100, help="Stop early if errors exceed this threshold")
+    parser.add_argument("--dry-run", action="store_true", help="Do not write data, only validate setup")
+    return parser.parse_args()
+# [/DEF:_parse_args:Function]
+
+
+# [DEF:_extract_result_payload:Function]
+# @PURPOSE: Normalizes Superset API payloads that may be wrapped in `result`.
+# @PRE: payload is a JSON-decoded API response.
+# @POST: Returns the unwrapped object when present.
+def _extract_result_payload(payload: Dict) -> Dict:
+    result = payload.get("result")
+    if isinstance(result, dict):
+        return result
+    return payload
+# [/DEF:_extract_result_payload:Function]
+
+
+# [DEF:_extract_created_id:Function]
+# @PURPOSE: Extracts object ID from create/update API response.
+# @PRE: payload is a JSON-decoded API response.
+# @POST: Returns integer object ID or None if missing.
+def _extract_created_id(payload: Dict) -> Optional[int]:
+    direct_id = payload.get("id")
+    if isinstance(direct_id, int):
+        return direct_id
+    result = payload.get("result")
+    if isinstance(result, dict) and isinstance(result.get("id"), int):
+        return int(result["id"])
+    return None
+# [/DEF:_extract_created_id:Function]
+
+
+# [DEF:_generate_unique_name:Function]
+# @PURPOSE: Generates globally unique random names for charts/dashboards.
+# @PRE: used_names is mutable set for collision tracking.
+# @POST: Returns a unique string and stores it in used_names.
+def _generate_unique_name(prefix: str, used_names: set[str], rng: random.Random) -> str:
+    adjectives = ["amber", "rapid", "frozen", "delta", "lunar", "vector", "cobalt", "silent", "neon", "solar"]
+    nouns = ["falcon", "matrix", "signal", "harbor", "stream", "vertex", "bridge", "orbit", "pulse", "forge"]
+    while True:
+        token = uuid.uuid4().hex[:8]
+        candidate = f"{prefix}_{rng.choice(adjectives)}_{rng.choice(nouns)}_{rng.randint(100, 999)}_{token}"
+        if candidate not in used_names:
+            used_names.add(candidate)
+            return candidate
+# [/DEF:_generate_unique_name:Function]
+
+
+# [DEF:_resolve_target_envs:Function]
+# @PURPOSE: Resolves requested environment IDs from configuration.
+# @PRE: env_ids is non-empty.
+# @POST: Returns mapping env_id -> configured environment object.
+def _resolve_target_envs(env_ids: List[str]) -> Dict[str, Environment]:
+    config_manager = ConfigManager()
+    configured = {env.id: env for env in config_manager.get_environments()}
+    resolved: Dict[str, Environment] = {}
+
+    if not configured:
+        for config_path in [Path("config.json"), Path("backend/config.json")]:
+            if not config_path.exists():
+                continue
+            try:
+                payload = json.loads(config_path.read_text(encoding="utf-8"))
+                env_rows = payload.get("environments", [])
+                for row in env_rows:
+                    env = Environment(**row)
+                    configured[env.id] = env
+            except Exception as exc:
+                logger.warning(f"[REFLECT] Failed loading environments from {config_path}: {exc}")
+
+    for env_id in env_ids:
+        env = configured.get(env_id)
+        if env is None:
+            raise ValueError(f"Environment '{env_id}' not found in configuration")
+        resolved[env_id] = env
+
+    return resolved
+# [/DEF:_resolve_target_envs:Function]
+
+
+# [DEF:_build_chart_template_pool:Function]
+# @PURPOSE: Builds a pool of source chart templates to clone in one environment.
+# @PRE: Client is authenticated.
+# @POST: Returns non-empty list of chart payload templates.
+def _build_chart_template_pool(client: SupersetClient, pool_size: int, rng: random.Random) -> List[Dict]:
+    list_query = {
+        "page": 0,
+        "page_size": 1000,
+        "columns": ["id", "slice_name", "datasource_id", "datasource_type", "viz_type", "params", "query_context"],
+    }
+    rows = client.network.fetch_paginated_data(
+        endpoint="/chart/",
+        pagination_options={"base_query": list_query, "results_field": "result"},
+    )
+
+    candidates = [row for row in rows if isinstance(row, dict) and row.get("id")]
+    if not candidates:
+        raise RuntimeError("No source charts available for templating")
+
+    selected = candidates if len(candidates) <= pool_size else rng.sample(candidates, pool_size)
+    templates: List[Dict] = []
+
+    for row in selected:
+        chart_id = int(row["id"])
+        detail_payload = client.get_chart(chart_id)
+        detail = _extract_result_payload(detail_payload)
+
+        datasource_id = detail.get("datasource_id")
+        datasource_type = detail.get("datasource_type") or row.get("datasource_type") or "table"
+        if datasource_id is None:
+            continue
+
+        params_value = detail.get("params")
+        if isinstance(params_value, dict):
+            params_value = json.dumps(params_value)
+
+        query_context_value = detail.get("query_context")
+        if isinstance(query_context_value, dict):
+            query_context_value = json.dumps(query_context_value)
+
+        templates.append(
+            {
+                "datasource_id": int(datasource_id),
+                "datasource_type": str(datasource_type),
+                "viz_type": detail.get("viz_type") or row.get("viz_type"),
+                "params": params_value,
+                "query_context": query_context_value,
+            }
+        )
+
+    if not templates:
+        raise RuntimeError("Could not build templates with datasource metadata")
+
+    return templates
+# [/DEF:_build_chart_template_pool:Function]
+
+
+# [DEF:seed_superset_load_data:Function]
+# @PURPOSE: Creates dashboards and cloned charts for load testing across target environments.
+# @PRE: Target environments must be reachable and authenticated.
+# @POST: Returns execution statistics dictionary.
+# @SIDE_EFFECT: Creates objects in Superset environments.
+def seed_superset_load_data(args: argparse.Namespace) -> Dict:
+    rng = random.Random(args.seed)
+    env_map = _resolve_target_envs(args.envs)
+
+    clients: Dict[str, SupersetClient] = {}
+    templates_by_env: Dict[str, List[Dict]] = {}
+    created_dashboards: Dict[str, List[int]] = {env_id: [] for env_id in env_map}
+    created_charts: Dict[str, List[int]] = {env_id: [] for env_id in env_map}
+    used_chart_names: set[str] = set()
+    used_dashboard_names: set[str] = set()
+
+    for env_id, env in env_map.items():
+        client = SupersetClient(env)
+        client.authenticate()
+        clients[env_id] = client
+        templates_by_env[env_id] = _build_chart_template_pool(client, args.template_pool_size, rng)
+        logger.info(f"[REASON] Environment {env_id}: loaded {len(templates_by_env[env_id])} chart templates")
+
+    errors = 0
+    env_ids = list(env_map.keys())
+
+    for idx in range(args.dashboards):
+        env_id = env_ids[idx % len(env_ids)] if idx < len(env_ids) else rng.choice(env_ids)
+        dashboard_title = _generate_unique_name("lt_dash", used_dashboard_names, rng)
+
+        if args.dry_run:
+            logger.info(f"[REFLECT] Dry-run dashboard create: env={env_id}, title={dashboard_title}")
+            continue
+
+        try:
+            payload = {"dashboard_title": dashboard_title, "published": False}
+            created = clients[env_id].network.request("POST", "/dashboard/", data=json.dumps(payload))
+            dashboard_id = _extract_created_id(created)
+            if dashboard_id is None:
+                raise RuntimeError(f"Dashboard create response missing id: {created}")
+            created_dashboards[env_id].append(dashboard_id)
+        except Exception as exc:
+            errors += 1
+            logger.error(f"[EXPLORE] Failed creating dashboard in {env_id}: {exc}")
+            if errors >= args.max_errors:
+                raise RuntimeError(f"Stopping due to max errors reached ({errors})") from exc
+
+    if args.dry_run:
+        return {
+            "dry_run": True,
+            "templates_by_env": {k: len(v) for k, v in templates_by_env.items()},
+            "charts_target": args.charts,
+            "dashboards_target": args.dashboards,
+        }
+
+    for env_id in env_ids:
+        if not created_dashboards[env_id]:
+            raise RuntimeError(f"No dashboards created in environment {env_id}; cannot bind charts")
+
+    for index in range(args.charts):
+        env_id = rng.choice(env_ids)
+        client = clients[env_id]
+        template = rng.choice(templates_by_env[env_id])
+        dashboard_id = rng.choice(created_dashboards[env_id])
+        chart_name = _generate_unique_name("lt_chart", used_chart_names, rng)
+
+        payload = {
+            "slice_name": chart_name,
+            "datasource_id": template["datasource_id"],
+            "datasource_type": template["datasource_type"],
+            "dashboards": [dashboard_id],
+        }
+        if template.get("viz_type"):
+            payload["viz_type"] = template["viz_type"]
+        if template.get("params"):
+            payload["params"] = template["params"]
+        if template.get("query_context"):
+            payload["query_context"] = template["query_context"]
+
+        try:
+            created = client.network.request("POST", "/chart/", data=json.dumps(payload))
+            chart_id = _extract_created_id(created)
+            if chart_id is None:
+                raise RuntimeError(f"Chart create response missing id: {created}")
+            created_charts[env_id].append(chart_id)
+
+            if (index + 1) % 500 == 0:
+                logger.info(f"[REASON] Created {index + 1}/{args.charts} charts")
+        except Exception as exc:
+            errors += 1
+            logger.error(f"[EXPLORE] Failed creating chart in {env_id}: {exc}")
+            if errors >= args.max_errors:
+                raise RuntimeError(f"Stopping due to max errors reached ({errors})") from exc
+
+    return {
+        "dry_run": False,
+        "errors": errors,
+        "dashboards": {env_id: len(ids) for env_id, ids in created_dashboards.items()},
+        "charts": {env_id: len(ids) for env_id, ids in created_charts.items()},
+        "total_dashboards": sum(len(ids) for ids in created_dashboards.values()),
+        "total_charts": sum(len(ids) for ids in created_charts.values()),
+    }
+# [/DEF:seed_superset_load_data:Function]
+
+
+# [DEF:main:Function]
+# @PURPOSE: CLI entrypoint for Superset load-test data seeding.
+# @PRE: Command line arguments are valid.
+# @POST: Prints summary and exits with non-zero status on failure.
+def main() -> None:
+    with belief_scope("seed_superset_load_test.main"):
+        args = _parse_args()
+        result = seed_superset_load_data(args)
+        logger.info(f"[COHERENCE:OK] Result summary: {json.dumps(result, ensure_ascii=True)}")
+
+
+if __name__ == "__main__":
+    main()
+
+# [/DEF:backend.src.scripts.seed_superset_load_test:Module]