ss-tools/backend/src/plugins/backup.py

# [DEF:BackupPlugin:Module]
# @SEMANTICS: backup, superset, automation, dashboard, plugin
# @PURPOSE: A plugin that provides functionality to back up Superset dashboards.
# @LAYER: App
# @RELATION: IMPLEMENTS -> PluginBase
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @RELATION: USES -> TaskContext

from typing import Dict, Any, Optional
from pathlib import Path
from requests.exceptions import RequestException

from ..core.plugin_base import PluginBase
from ..core.logger import belief_scope, logger as app_logger
from ..core.superset_client import SupersetClient
from ..core.utils.network import SupersetAPIError
from ..core.utils.fileio import (
    save_and_unpack_dashboard,
    archive_exports,
    sanitize_filename,
    consolidate_archive_folders,
    remove_empty_directories,
    RetentionPolicy
)
from ..dependencies import get_config_manager
from ..core.task_manager.context import TaskContext

# [DEF:BackupPlugin:Class]
# @PURPOSE: Implementation of the backup plugin logic.
class BackupPlugin(PluginBase):
    """
    A plugin to back up Superset dashboards.
    """

    @property
    # [DEF:id:Function]
    # @PURPOSE: Returns the unique identifier for the backup plugin.
    # @PRE: Plugin instance exists.
    # @POST: Returns string ID.
    # @RETURN: str - "superset-backup"
    def id(self) -> str:
        with belief_scope("id"):
            return "superset-backup"
    # [/DEF:id:Function]

    @property
    # [DEF:name:Function]
    # @PURPOSE: Returns the human-readable name of the backup plugin.
    # @PRE: Plugin instance exists.
    # @POST: Returns string name.
    # @RETURN: str - Plugin name.
    def name(self) -> str:
        with belief_scope("name"):
            return "Superset Dashboard Backup"
    # [/DEF:name:Function]

    @property
    # [DEF:description:Function]
    # @PURPOSE: Returns a description of the backup plugin.
    # @PRE: Plugin instance exists.
    # @POST: Returns string description.
    # @RETURN: str - Plugin description.
    def description(self) -> str:
        with belief_scope("description"):
            return "Backs up all dashboards from a Superset instance."
    # [/DEF:description:Function]

    @property
    # [DEF:version:Function]
    # @PURPOSE: Returns the version of the backup plugin.
    # @PRE: Plugin instance exists.
    # @POST: Returns string version.
    # @RETURN: str - "1.0.0"
    def version(self) -> str:
        with belief_scope("version"):
            return "1.0.0"
    # [/DEF:version:Function]

    @property
    # [DEF:ui_route:Function]
    # @PURPOSE: Returns the frontend route for the backup plugin.
    # @RETURN: str - "/tools/backups"
    def ui_route(self) -> str:
        with belief_scope("ui_route"):
            return "/tools/backups"
    # [/DEF:ui_route:Function]

    # [DEF:get_schema:Function]
    # @PURPOSE: Returns the JSON schema for backup plugin parameters.
    # @PRE: Plugin instance exists.
    # @POST: Returns dictionary schema.
    # @RETURN: Dict[str, Any] - JSON schema.
    def get_schema(self) -> Dict[str, Any]:
        with belief_scope("get_schema"):
            config_manager = get_config_manager()
            envs = [e.name for e in config_manager.get_environments()]
        config_manager.get_config().settings.storage.root_path

        return {
            "type": "object",
            "properties": {
                "env": {
                    "type": "string",
                    "title": "Environment",
                    "description": "The Superset environment to back up.",
                    "enum": envs if envs else [],
                },
            },
            "required": ["env"],
        }
    # [/DEF:get_schema:Function]

    # [DEF:execute:Function]
    # @PURPOSE: Executes the dashboard backup logic with TaskContext support.
    # @PARAM: params (Dict[str, Any]) - Backup parameters (env, backup_path, dashboard_ids).
    # @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
    # @PRE: Target environment must be configured. params must be a dictionary.
    # @POST: All dashboards are exported and archived.
    async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
        with belief_scope("execute"):
            config_manager = get_config_manager()

            # Support both parameter names: environment_id (for task creation) and env (for direct calls)
            env_id = params.get("environment_id") or params.get("env")
            dashboard_ids = params.get("dashboard_ids") or params.get("dashboards")

            # Log the incoming parameters for debugging
            log = context.logger if context else app_logger
            log.info(f"Backup parameters received: env_id={env_id}, dashboard_ids={dashboard_ids}")

            # Resolve environment name if environment_id is provided
            if env_id:
                env_config = next((e for e in config_manager.get_environments() if e.id == env_id), None)
                if env_config:
                    params["env"] = env_config.name

            env = params.get("env")
            if not env:
                raise KeyError("env")

            log.info(f"Backup started for environment: {env}, selected dashboards: {dashboard_ids}")

            storage_settings = config_manager.get_config().settings.storage
            # Use 'backups' subfolder within the storage root
            backup_path = Path(storage_settings.root_path) / "backups"

            # Use TaskContext logger if available, otherwise fall back to app_logger
            log = context.logger if context else app_logger

            # Create sub-loggers for different components
            superset_log = log.with_source("superset_api") if context else log
            storage_log = log.with_source("storage") if context else log

            log.info(f"Starting backup for environment: {env}")

            try:
                config_manager = get_config_manager()
                if not config_manager.has_environments():
                    raise ValueError("No Superset environments configured. Please add an environment in Settings.")

                env_config = config_manager.get_environment(env)
                if not env_config:
                    raise ValueError(f"Environment '{env}' not found in configuration.")

                client = SupersetClient(env_config)

                # Get all dashboards
                all_dashboard_count, all_dashboard_meta = client.get_dashboards()
                superset_log.info(f"Found {all_dashboard_count} total dashboards in environment")

                # Filter dashboards if specific IDs are provided
                if dashboard_ids:
                    dashboard_ids_int = [int(did) for did in dashboard_ids]
                    dashboard_meta = [db for db in all_dashboard_meta if db.get('id') in dashboard_ids_int]
                    dashboard_count = len(dashboard_meta)
                    superset_log.info(f"Filtered to {dashboard_count} selected dashboards: {dashboard_ids_int}")
                else:
                    dashboard_count = all_dashboard_count
                    superset_log.info("No dashboard filter applied - backing up all dashboards")
                    dashboard_meta = all_dashboard_meta

                if dashboard_count == 0:
                    log.info("No dashboards to back up")
                    return {
                        "status": "NO_DASHBOARDS",
                        "environment": env,
                        "backup_root": str(backup_path / env.upper()),
                        "total_dashboards": 0,
                        "backed_up_dashboards": 0,
                        "failed_dashboards": 0,
                        "dashboards": [],
                        "failures": []
                    }

                total = len(dashboard_meta)
                backed_up_dashboards = []
                failed_dashboards = []
                for idx, db in enumerate(dashboard_meta, 1):
                    dashboard_id = db.get('id')
                    dashboard_title = db.get('dashboard_title', 'Unknown Dashboard')
                    if not dashboard_id:
                        continue

                    # Report progress
                    progress_pct = (idx / total) * 100
                    log.progress(f"Backing up dashboard: {dashboard_title}", percent=progress_pct)

                    try:
                        dashboard_base_dir_name = sanitize_filename(f"{dashboard_title}")
                        dashboard_dir = backup_path / env.upper() / dashboard_base_dir_name
                        dashboard_dir.mkdir(parents=True, exist_ok=True)

                        zip_content, filename = client.export_dashboard(dashboard_id)
                        superset_log.debug(f"Exported dashboard: {dashboard_title}")

                        save_and_unpack_dashboard(
                            zip_content=zip_content,
                            original_filename=filename,
                            output_dir=dashboard_dir,
                            unpack=False
                        )

                        archive_exports(str(dashboard_dir), policy=RetentionPolicy())
                        storage_log.debug(f"Archived dashboard: {dashboard_title}")
                        backed_up_dashboards.append({
                            "id": dashboard_id,
                            "title": dashboard_title,
                            "path": str(dashboard_dir)
                        })

                    except (SupersetAPIError, RequestException, IOError, OSError) as db_error:
                        log.error(f"Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}")
                        failed_dashboards.append({
                            "id": dashboard_id,
                            "title": dashboard_title,
                            "error": str(db_error)
                        })
                        continue

                consolidate_archive_folders(backup_path / env.upper())
                remove_empty_directories(str(backup_path / env.upper()))

                log.info(f"Backup completed successfully for {env}")
                return {
                    "status": "SUCCESS" if not failed_dashboards else "PARTIAL_SUCCESS",
                    "environment": env,
                    "backup_root": str(backup_path / env.upper()),
                    "total_dashboards": total,
                    "backed_up_dashboards": len(backed_up_dashboards),
                    "failed_dashboards": len(failed_dashboards),
                    "dashboards": backed_up_dashboards,
                    "failures": failed_dashboards
                }

            except (RequestException, IOError, KeyError) as e:
                log.error(f"Fatal error during backup for {env}: {e}")
                raise e
    # [/DEF:execute:Function]
# [/DEF:BackupPlugin:Class]
# [/DEF:BackupPlugin:Module]