feat: Implement recursive storage listing and directory browsing for backups, and add a migration option to fix cross-filters.

This commit is contained in:
2026-02-25 20:01:33 +03:00
parent 5d42a6b930
commit f9ac282596
12 changed files with 533 additions and 53 deletions

View File

@@ -851,9 +851,9 @@ def _build_tool_catalog(current_user: User, config_manager: ConfigManager, db: S
{
"operation": "execute_migration",
"domain": "migration",
"description": "Run dashboard migration (id/slug/title) between environments",
"description": "Run dashboard migration (id/slug/title) between environments. Optional boolean flags: replace_db_config, fix_cross_filters",
"required_entities": ["source_env", "target_env"],
"optional_entities": ["dashboard_id", "dashboard_ref"],
"optional_entities": ["dashboard_id", "dashboard_ref", "replace_db_config", "fix_cross_filters"],
"risk_level": "guarded",
"requires_confirmation": False,
},
@@ -1208,20 +1208,30 @@ async def _dispatch_intent(
if operation == "execute_migration":
_check_any_permission(current_user, [("plugin:migration", "EXECUTE"), ("plugin:superset-migration", "EXECUTE")])
src_token = entities.get("source_env")
dashboard_ref = entities.get("dashboard_ref")
dashboard_id = _resolve_dashboard_id_entity(entities, config_manager, env_hint=src_token)
src = _resolve_env_id(src_token, config_manager)
tgt = _resolve_env_id(entities.get("target_env"), config_manager)
if not dashboard_id or not src or not tgt:
raise HTTPException(status_code=422, detail="Missing dashboard_id/dashboard_ref/source_env/target_env")
if not src or not tgt:
raise HTTPException(status_code=422, detail="Missing source_env/target_env")
if not dashboard_id and not dashboard_ref:
raise HTTPException(status_code=422, detail="Missing dashboard_id/dashboard_ref")
migration_params: Dict[str, Any] = {
"source_env_id": src,
"target_env_id": tgt,
"replace_db_config": _coerce_query_bool(entities.get("replace_db_config", False)),
"fix_cross_filters": _coerce_query_bool(entities.get("fix_cross_filters", True)),
}
if dashboard_id:
migration_params["selected_ids"] = [dashboard_id]
else:
# Fallback: pass dashboard_ref as regex for the migration plugin to match
migration_params["dashboard_regex"] = str(dashboard_ref)
task = await task_manager.create_task(
plugin_id="superset-migration",
params={
"selected_ids": [dashboard_id],
"source_env_id": src,
"target_env_id": tgt,
"replace_db_config": False,
},
params=migration_params,
user_id=current_user.id,
)
return (

View File

@@ -36,6 +36,7 @@ router = APIRouter(tags=["storage"])
async def list_files(
category: Optional[FileCategory] = None,
path: Optional[str] = None,
recursive: bool = False,
plugin_loader=Depends(get_plugin_loader),
_ = Depends(has_permission("plugin:storage", "READ"))
):
@@ -43,7 +44,7 @@ async def list_files(
storage_plugin: StoragePlugin = plugin_loader.get_plugin("storage-manager")
if not storage_plugin:
raise HTTPException(status_code=500, detail="Storage plugin not loaded")
return storage_plugin.list_files(category, path)
return storage_plugin.list_files(category, path, recursive)
# [/DEF:list_files:Function]
# [DEF:upload_file:Function]
@@ -143,4 +144,4 @@ async def download_file(
raise HTTPException(status_code=400, detail=str(e))
# [/DEF:download_file:Function]
# [/DEF:storage_routes:Module]
# [/DEF:storage_routes:Module]

View File

@@ -150,6 +150,7 @@ class MigrationPlugin(PluginBase):
dashboard_regex = params.get("dashboard_regex")
replace_db_config = params.get("replace_db_config", False)
fix_cross_filters = params.get("fix_cross_filters", True)
params.get("from_db_id")
params.get("to_db_id")
@@ -217,9 +218,9 @@ class MigrationPlugin(PluginBase):
if selected_ids:
dashboards_to_migrate = [d for d in all_dashboards if d["id"] in selected_ids]
elif dashboard_regex:
regex_str = str(dashboard_regex)
regex_pattern = re.compile(str(dashboard_regex), re.IGNORECASE)
dashboards_to_migrate = [
d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
d for d in all_dashboards if regex_pattern.search(d.get("dashboard_title", ""))
]
else:
log.warning("No selection criteria provided (selected_ids or dashboard_regex).")
@@ -270,7 +271,14 @@ class MigrationPlugin(PluginBase):
with create_temp_file(content=exported_content, dry_run=True, suffix=".zip") as tmp_zip_path:
# Always transform to strip databases to avoid password errors
with create_temp_file(suffix=".zip", dry_run=True) as tmp_new_zip:
success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping, strip_databases=False)
success = engine.transform_zip(
str(tmp_zip_path),
str(tmp_new_zip),
db_mapping,
strip_databases=False,
target_env_id=tgt_env.id if tgt_env else None,
fix_cross_filters=fix_cross_filters
)
if not success and replace_db_config:
# Signal missing mapping and wait (only if we care about mappings)
@@ -283,16 +291,23 @@ class MigrationPlugin(PluginBase):
# (Mappings would be updated in task.params by resolve_task)
db = SessionLocal()
try:
src_env = db.query(Environment).filter(Environment.name == from_env_name).first()
tgt_env = db.query(Environment).filter(Environment.name == to_env_name).first()
src_env_rt = db.query(Environment).filter(Environment.name == from_env_name).first()
tgt_env_rt = db.query(Environment).filter(Environment.name == to_env_name).first()
mappings = db.query(DatabaseMapping).filter(
DatabaseMapping.source_env_id == src_env.id,
DatabaseMapping.target_env_id == tgt_env.id
DatabaseMapping.source_env_id == src_env_rt.id,
DatabaseMapping.target_env_id == tgt_env_rt.id
).all()
db_mapping = {m.source_db_uuid: m.target_db_uuid for m in mappings}
finally:
db.close()
success = engine.transform_zip(str(tmp_zip_path), str(tmp_new_zip), db_mapping, strip_databases=False)
success = engine.transform_zip(
str(tmp_zip_path),
str(tmp_new_zip),
db_mapping,
strip_databases=False,
target_env_id=tgt_env.id if tgt_env else None,
fix_cross_filters=fix_cross_filters
)
if success:
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)

View File

@@ -212,13 +212,21 @@ class StoragePlugin(PluginBase):
# @PURPOSE: Lists all files and directories in a specific category and subpath.
# @PARAM: category (Optional[FileCategory]) - The category to list.
# @PARAM: subpath (Optional[str]) - Nested path within the category.
# @PARAM: recursive (bool) - Whether to scan nested subdirectories recursively.
# @PRE: Storage root must exist.
# @POST: Returns a list of StoredFile objects.
# @RETURN: List[StoredFile] - List of file and directory metadata objects.
def list_files(self, category: Optional[FileCategory] = None, subpath: Optional[str] = None) -> List[StoredFile]:
def list_files(
self,
category: Optional[FileCategory] = None,
subpath: Optional[str] = None,
recursive: bool = False,
) -> List[StoredFile]:
with belief_scope("StoragePlugin:list_files"):
root = self.get_storage_root()
logger.info(f"[StoragePlugin][Action] Listing files in root: {root}, category: {category}, subpath: {subpath}")
logger.info(
f"[StoragePlugin][Action] Listing files in root: {root}, category: {category}, subpath: {subpath}, recursive: {recursive}"
)
files = []
categories = [category] if category else list(FileCategory)
@@ -235,17 +243,37 @@ class StoragePlugin(PluginBase):
continue
logger.debug(f"[StoragePlugin][Action] Scanning directory: {target_dir}")
if recursive:
for current_root, dirs, filenames in os.walk(target_dir):
dirs[:] = [d for d in dirs if "Logs" not in d]
for filename in filenames:
file_path = Path(current_root) / filename
if "Logs" in str(file_path):
continue
stat = file_path.stat()
files.append(
StoredFile(
name=filename,
path=str(file_path.relative_to(root)),
size=stat.st_size,
created_at=datetime.fromtimestamp(stat.st_ctime),
category=cat,
mime_type=None,
)
)
continue
# Use os.scandir for better performance and to distinguish files vs dirs
with os.scandir(target_dir) as it:
for entry in it:
# Skip logs
if "Logs" in entry.path:
continue
stat = entry.stat()
is_dir = entry.is_dir()
files.append(StoredFile(
name=entry.name,
path=str(Path(entry.path).relative_to(root)),
@@ -341,4 +369,4 @@ class StoragePlugin(PluginBase):
# [/DEF:get_file_path:Function]
# [/DEF:StoragePlugin:Class]
# [/DEF:StoragePlugin:Module]
# [/DEF:StoragePlugin:Module]

View File

@@ -0,0 +1,297 @@
# [DEF:backend.src.scripts.seed_superset_load_test:Module]
#
# @TIER: STANDARD
# @SEMANTICS: superset, load-test, charts, dashboards, seed, stress
# @PURPOSE: Creates randomized load-test data in Superset by cloning chart configurations and creating dashboards in target environments.
# @LAYER: Scripts
# @RELATION: USES -> backend.src.core.config_manager.ConfigManager
# @RELATION: USES -> backend.src.core.superset_client.SupersetClient
# @INVARIANT: Created chart and dashboard names are globally unique for one script run.
# [SECTION: IMPORTS]
import argparse
import json
import random
import sys
import uuid
from pathlib import Path
from typing import Dict, List, Optional
sys.path.append(str(Path(__file__).parent.parent.parent))
from src.core.config_manager import ConfigManager
from src.core.config_models import Environment
from src.core.logger import belief_scope, logger
from src.core.superset_client import SupersetClient
# [/SECTION]
# [DEF:_parse_args:Function]
# @PURPOSE: Parses CLI arguments for load-test data generation.
# @PRE: Script is called from CLI.
# @POST: Returns validated argument namespace.
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Seed Superset with load-test charts and dashboards")
parser.add_argument("--envs", nargs="+", default=["ss1", "ss2"], help="Target environment IDs")
parser.add_argument("--charts", type=int, default=10000, help="Target number of charts to create")
parser.add_argument("--dashboards", type=int, default=500, help="Target number of dashboards to create")
parser.add_argument("--template-pool-size", type=int, default=200, help="How many source charts to sample as templates per env")
parser.add_argument("--seed", type=int, default=None, help="Optional RNG seed for reproducibility")
parser.add_argument("--max-errors", type=int, default=100, help="Stop early if errors exceed this threshold")
parser.add_argument("--dry-run", action="store_true", help="Do not write data, only validate setup")
return parser.parse_args()
# [/DEF:_parse_args:Function]
# [DEF:_extract_result_payload:Function]
# @PURPOSE: Normalizes Superset API payloads that may be wrapped in `result`.
# @PRE: payload is a JSON-decoded API response.
# @POST: Returns the unwrapped object when present.
def _extract_result_payload(payload: Dict) -> Dict:
result = payload.get("result")
if isinstance(result, dict):
return result
return payload
# [/DEF:_extract_result_payload:Function]
# [DEF:_extract_created_id:Function]
# @PURPOSE: Extracts object ID from create/update API response.
# @PRE: payload is a JSON-decoded API response.
# @POST: Returns integer object ID or None if missing.
def _extract_created_id(payload: Dict) -> Optional[int]:
direct_id = payload.get("id")
if isinstance(direct_id, int):
return direct_id
result = payload.get("result")
if isinstance(result, dict) and isinstance(result.get("id"), int):
return int(result["id"])
return None
# [/DEF:_extract_created_id:Function]
# [DEF:_generate_unique_name:Function]
# @PURPOSE: Generates globally unique random names for charts/dashboards.
# @PRE: used_names is mutable set for collision tracking.
# @POST: Returns a unique string and stores it in used_names.
def _generate_unique_name(prefix: str, used_names: set[str], rng: random.Random) -> str:
adjectives = ["amber", "rapid", "frozen", "delta", "lunar", "vector", "cobalt", "silent", "neon", "solar"]
nouns = ["falcon", "matrix", "signal", "harbor", "stream", "vertex", "bridge", "orbit", "pulse", "forge"]
while True:
token = uuid.uuid4().hex[:8]
candidate = f"{prefix}_{rng.choice(adjectives)}_{rng.choice(nouns)}_{rng.randint(100, 999)}_{token}"
if candidate not in used_names:
used_names.add(candidate)
return candidate
# [/DEF:_generate_unique_name:Function]
# [DEF:_resolve_target_envs:Function]
# @PURPOSE: Resolves requested environment IDs from configuration.
# @PRE: env_ids is non-empty.
# @POST: Returns mapping env_id -> configured environment object.
def _resolve_target_envs(env_ids: List[str]) -> Dict[str, Environment]:
config_manager = ConfigManager()
configured = {env.id: env for env in config_manager.get_environments()}
resolved: Dict[str, Environment] = {}
if not configured:
for config_path in [Path("config.json"), Path("backend/config.json")]:
if not config_path.exists():
continue
try:
payload = json.loads(config_path.read_text(encoding="utf-8"))
env_rows = payload.get("environments", [])
for row in env_rows:
env = Environment(**row)
configured[env.id] = env
except Exception as exc:
logger.warning(f"[REFLECT] Failed loading environments from {config_path}: {exc}")
for env_id in env_ids:
env = configured.get(env_id)
if env is None:
raise ValueError(f"Environment '{env_id}' not found in configuration")
resolved[env_id] = env
return resolved
# [/DEF:_resolve_target_envs:Function]
# [DEF:_build_chart_template_pool:Function]
# @PURPOSE: Builds a pool of source chart templates to clone in one environment.
# @PRE: Client is authenticated.
# @POST: Returns non-empty list of chart payload templates.
def _build_chart_template_pool(client: SupersetClient, pool_size: int, rng: random.Random) -> List[Dict]:
list_query = {
"page": 0,
"page_size": 1000,
"columns": ["id", "slice_name", "datasource_id", "datasource_type", "viz_type", "params", "query_context"],
}
rows = client.network.fetch_paginated_data(
endpoint="/chart/",
pagination_options={"base_query": list_query, "results_field": "result"},
)
candidates = [row for row in rows if isinstance(row, dict) and row.get("id")]
if not candidates:
raise RuntimeError("No source charts available for templating")
selected = candidates if len(candidates) <= pool_size else rng.sample(candidates, pool_size)
templates: List[Dict] = []
for row in selected:
chart_id = int(row["id"])
detail_payload = client.get_chart(chart_id)
detail = _extract_result_payload(detail_payload)
datasource_id = detail.get("datasource_id")
datasource_type = detail.get("datasource_type") or row.get("datasource_type") or "table"
if datasource_id is None:
continue
params_value = detail.get("params")
if isinstance(params_value, dict):
params_value = json.dumps(params_value)
query_context_value = detail.get("query_context")
if isinstance(query_context_value, dict):
query_context_value = json.dumps(query_context_value)
templates.append(
{
"datasource_id": int(datasource_id),
"datasource_type": str(datasource_type),
"viz_type": detail.get("viz_type") or row.get("viz_type"),
"params": params_value,
"query_context": query_context_value,
}
)
if not templates:
raise RuntimeError("Could not build templates with datasource metadata")
return templates
# [/DEF:_build_chart_template_pool:Function]
# [DEF:seed_superset_load_data:Function]
# @PURPOSE: Creates dashboards and cloned charts for load testing across target environments.
# @PRE: Target environments must be reachable and authenticated.
# @POST: Returns execution statistics dictionary.
# @SIDE_EFFECT: Creates objects in Superset environments.
def seed_superset_load_data(args: argparse.Namespace) -> Dict:
rng = random.Random(args.seed)
env_map = _resolve_target_envs(args.envs)
clients: Dict[str, SupersetClient] = {}
templates_by_env: Dict[str, List[Dict]] = {}
created_dashboards: Dict[str, List[int]] = {env_id: [] for env_id in env_map}
created_charts: Dict[str, List[int]] = {env_id: [] for env_id in env_map}
used_chart_names: set[str] = set()
used_dashboard_names: set[str] = set()
for env_id, env in env_map.items():
client = SupersetClient(env)
client.authenticate()
clients[env_id] = client
templates_by_env[env_id] = _build_chart_template_pool(client, args.template_pool_size, rng)
logger.info(f"[REASON] Environment {env_id}: loaded {len(templates_by_env[env_id])} chart templates")
errors = 0
env_ids = list(env_map.keys())
for idx in range(args.dashboards):
env_id = env_ids[idx % len(env_ids)] if idx < len(env_ids) else rng.choice(env_ids)
dashboard_title = _generate_unique_name("lt_dash", used_dashboard_names, rng)
if args.dry_run:
logger.info(f"[REFLECT] Dry-run dashboard create: env={env_id}, title={dashboard_title}")
continue
try:
payload = {"dashboard_title": dashboard_title, "published": False}
created = clients[env_id].network.request("POST", "/dashboard/", data=json.dumps(payload))
dashboard_id = _extract_created_id(created)
if dashboard_id is None:
raise RuntimeError(f"Dashboard create response missing id: {created}")
created_dashboards[env_id].append(dashboard_id)
except Exception as exc:
errors += 1
logger.error(f"[EXPLORE] Failed creating dashboard in {env_id}: {exc}")
if errors >= args.max_errors:
raise RuntimeError(f"Stopping due to max errors reached ({errors})") from exc
if args.dry_run:
return {
"dry_run": True,
"templates_by_env": {k: len(v) for k, v in templates_by_env.items()},
"charts_target": args.charts,
"dashboards_target": args.dashboards,
}
for env_id in env_ids:
if not created_dashboards[env_id]:
raise RuntimeError(f"No dashboards created in environment {env_id}; cannot bind charts")
for index in range(args.charts):
env_id = rng.choice(env_ids)
client = clients[env_id]
template = rng.choice(templates_by_env[env_id])
dashboard_id = rng.choice(created_dashboards[env_id])
chart_name = _generate_unique_name("lt_chart", used_chart_names, rng)
payload = {
"slice_name": chart_name,
"datasource_id": template["datasource_id"],
"datasource_type": template["datasource_type"],
"dashboards": [dashboard_id],
}
if template.get("viz_type"):
payload["viz_type"] = template["viz_type"]
if template.get("params"):
payload["params"] = template["params"]
if template.get("query_context"):
payload["query_context"] = template["query_context"]
try:
created = client.network.request("POST", "/chart/", data=json.dumps(payload))
chart_id = _extract_created_id(created)
if chart_id is None:
raise RuntimeError(f"Chart create response missing id: {created}")
created_charts[env_id].append(chart_id)
if (index + 1) % 500 == 0:
logger.info(f"[REASON] Created {index + 1}/{args.charts} charts")
except Exception as exc:
errors += 1
logger.error(f"[EXPLORE] Failed creating chart in {env_id}: {exc}")
if errors >= args.max_errors:
raise RuntimeError(f"Stopping due to max errors reached ({errors})") from exc
return {
"dry_run": False,
"errors": errors,
"dashboards": {env_id: len(ids) for env_id, ids in created_dashboards.items()},
"charts": {env_id: len(ids) for env_id, ids in created_charts.items()},
"total_dashboards": sum(len(ids) for ids in created_dashboards.values()),
"total_charts": sum(len(ids) for ids in created_charts.values()),
}
# [/DEF:seed_superset_load_data:Function]
# [DEF:main:Function]
# @PURPOSE: CLI entrypoint for Superset load-test data seeding.
# @PRE: Command line arguments are valid.
# @POST: Prints summary and exits with non-zero status on failure.
def main() -> None:
with belief_scope("seed_superset_load_test.main"):
args = _parse_args()
result = seed_superset_load_data(args)
logger.info(f"[COHERENCE:OK] Result summary: {json.dumps(result, ensure_ascii=True)}")
if __name__ == "__main__":
main()
# [/DEF:backend.src.scripts.seed_superset_load_test:Module]