semantic update

This commit is contained in:
2026-02-08 22:53:54 +03:00
parent e6087bd3c1
commit 235b0e3c9f
71 changed files with 68034 additions and 62417 deletions

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.api.routes.environments:Module]
#
# @TIER: STANDARD
# @SEMANTICS: api, environments, superset, databases
# @PURPOSE: API endpoints for listing environments and their databases.
# @LAYER: API

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.api.routes.git:Module]
#
# @TIER: STANDARD
# @SEMANTICS: git, routes, api, fastapi, repository, deployment
# @PURPOSE: Provides FastAPI endpoints for Git integration operations.
# @LAYER: API

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.api.routes.mappings:Module]
#
# @TIER: STANDARD
# @SEMANTICS: api, mappings, database, fuzzy-matching
# @PURPOSE: API endpoints for managing database mappings and getting suggestions.
# @LAYER: API

View File

@@ -1,4 +1,5 @@
# [DEF:backend.src.api.routes.migration:Module]
# @TIER: STANDARD
# @SEMANTICS: api, migration, dashboards
# @PURPOSE: API endpoints for migration operations.
# @LAYER: API

View File

@@ -1,4 +1,5 @@
# [DEF:PluginsRouter:Module]
# @TIER: STANDARD
# @SEMANTICS: api, router, plugins, list
# @PURPOSE: Defines the FastAPI router for plugin-related endpoints, allowing clients to list available plugins.
# @LAYER: UI (API)

View File

@@ -12,15 +12,25 @@
# [SECTION: IMPORTS]
from fastapi import APIRouter, Depends, HTTPException
from typing import List
from ...core.config_models import AppConfig, Environment, GlobalSettings
from pydantic import BaseModel
from ...core.config_models import AppConfig, Environment, GlobalSettings, LoggingConfig
from ...models.storage import StorageConfig
from ...dependencies import get_config_manager, has_permission
from ...core.config_manager import ConfigManager
from ...core.logger import logger, belief_scope
from ...core.logger import logger, belief_scope, get_task_log_level
from ...core.superset_client import SupersetClient
import os
# [/SECTION]
# [DEF:LoggingConfigResponse:Class]
# @PURPOSE: Response model for logging configuration with current task log level.
# @SEMANTICS: logging, config, response
class LoggingConfigResponse(BaseModel):
level: str
task_log_level: str
enable_belief_state: bool
# [/DEF:LoggingConfigResponse:Class]
router = APIRouter()
# [DEF:get_settings:Function]
@@ -223,5 +233,50 @@ async def test_environment_connection(
return {"status": "error", "message": str(e)}
# [/DEF:test_environment_connection:Function]
# [DEF:get_logging_config:Function]
# @PURPOSE: Retrieves current logging configuration.
# @PRE: Config manager is available.
# @POST: Returns logging configuration.
# @RETURN: LoggingConfigResponse - The current logging config.
@router.get("/logging", response_model=LoggingConfigResponse)
async def get_logging_config(
config_manager: ConfigManager = Depends(get_config_manager),
_ = Depends(has_permission("admin:settings", "READ"))
):
with belief_scope("get_logging_config"):
logging_config = config_manager.get_config().settings.logging
return LoggingConfigResponse(
level=logging_config.level,
task_log_level=logging_config.task_log_level,
enable_belief_state=logging_config.enable_belief_state
)
# [/DEF:get_logging_config:Function]
# [DEF:update_logging_config:Function]
# @PURPOSE: Updates logging configuration.
# @PRE: New logging config is provided.
# @POST: Logging configuration is updated and saved.
# @PARAM: config (LoggingConfig) - The new logging configuration.
# @RETURN: LoggingConfigResponse - The updated logging config.
@router.patch("/logging", response_model=LoggingConfigResponse)
async def update_logging_config(
config: LoggingConfig,
config_manager: ConfigManager = Depends(get_config_manager),
_ = Depends(has_permission("admin:settings", "WRITE"))
):
with belief_scope("update_logging_config"):
logger.info(f"[update_logging_config][Entry] Updating logging config: level={config.level}, task_log_level={config.task_log_level}")
# Get current settings and update logging config
settings = config_manager.get_config().settings
settings.logging = config
config_manager.update_global_settings(settings)
return LoggingConfigResponse(
level=config.level,
task_log_level=config.task_log_level,
enable_belief_state=config.enable_belief_state
)
# [/DEF:update_logging_config:Function]
# [/DEF:SettingsRouter:Module]

View File

@@ -1,5 +1,6 @@
# [DEF:storage_routes:Module]
#
# @TIER: STANDARD
# @SEMANTICS: storage, files, upload, download, backup, repository
# @PURPOSE: API endpoints for file storage management (backups and repositories).
# @LAYER: API

View File

@@ -1,14 +1,16 @@
# [DEF:TasksRouter:Module]
# @SEMANTICS: api, router, tasks, create, list, get
# @TIER: STANDARD
# @SEMANTICS: api, router, tasks, create, list, get, logs
# @PURPOSE: Defines the FastAPI router for task-related endpoints, allowing clients to create, list, and get the status of tasks.
# @LAYER: UI (API)
# @RELATION: Depends on the TaskManager. It is included by the main app.
from typing import List, Dict, Any, Optional
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from fastapi import APIRouter, Depends, HTTPException, status, Query
from pydantic import BaseModel, Field
from ...core.logger import belief_scope
from ...core.task_manager import TaskManager, Task, TaskStatus, LogEntry
from ...core.task_manager.models import LogFilter, LogStats
from ...dependencies import get_task_manager, has_permission, get_current_user
router = APIRouter()
@@ -116,27 +118,93 @@ async def get_task(
@router.get("/{task_id}/logs", response_model=List[LogEntry])
# [DEF:get_task_logs:Function]
# @PURPOSE: Retrieve logs for a specific task.
# @PURPOSE: Retrieve logs for a specific task with optional filtering.
# @PARAM: task_id (str) - The unique identifier of the task.
# @PARAM: level (Optional[str]) - Filter by log level (DEBUG, INFO, WARNING, ERROR).
# @PARAM: source (Optional[str]) - Filter by source component.
# @PARAM: search (Optional[str]) - Text search in message.
# @PARAM: offset (int) - Number of logs to skip.
# @PARAM: limit (int) - Maximum number of logs to return.
# @PARAM: task_manager (TaskManager) - The task manager instance.
# @PRE: task_id must exist.
# @POST: Returns a list of log entries or raises 404.
# @RETURN: List[LogEntry] - List of log entries.
# @TIER: CRITICAL
async def get_task_logs(
task_id: str,
level: Optional[str] = Query(None, description="Filter by log level (DEBUG, INFO, WARNING, ERROR)"),
source: Optional[str] = Query(None, description="Filter by source component"),
search: Optional[str] = Query(None, description="Text search in message"),
offset: int = Query(0, ge=0, description="Number of logs to skip"),
limit: int = Query(100, ge=1, le=1000, description="Maximum number of logs to return"),
task_manager: TaskManager = Depends(get_task_manager),
_ = Depends(has_permission("tasks", "READ"))
):
"""
Retrieve logs for a specific task.
Retrieve logs for a specific task with optional filtering.
Supports filtering by level, source, and text search.
"""
with belief_scope("get_task_logs"):
task = task_manager.get_task(task_id)
if not task:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Task not found")
return task_manager.get_task_logs(task_id)
log_filter = LogFilter(
level=level.upper() if level else None,
source=source,
search=search,
offset=offset,
limit=limit
)
return task_manager.get_task_logs(task_id, log_filter)
# [/DEF:get_task_logs:Function]
@router.get("/{task_id}/logs/stats", response_model=LogStats)
# [DEF:get_task_log_stats:Function]
# @PURPOSE: Get statistics about logs for a task (counts by level and source).
# @PARAM: task_id (str) - The unique identifier of the task.
# @PARAM: task_manager (TaskManager) - The task manager instance.
# @PRE: task_id must exist.
# @POST: Returns log statistics or raises 404.
# @RETURN: LogStats - Statistics about task logs.
async def get_task_log_stats(
task_id: str,
task_manager: TaskManager = Depends(get_task_manager),
_ = Depends(has_permission("tasks", "READ"))
):
"""
Get statistics about logs for a task (counts by level and source).
"""
with belief_scope("get_task_log_stats"):
task = task_manager.get_task(task_id)
if not task:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Task not found")
return task_manager.get_task_log_stats(task_id)
# [/DEF:get_task_log_stats:Function]
@router.get("/{task_id}/logs/sources", response_model=List[str])
# [DEF:get_task_log_sources:Function]
# @PURPOSE: Get unique sources for a task's logs.
# @PARAM: task_id (str) - The unique identifier of the task.
# @PARAM: task_manager (TaskManager) - The task manager instance.
# @PRE: task_id must exist.
# @POST: Returns list of unique source names or raises 404.
# @RETURN: List[str] - Unique source names.
async def get_task_log_sources(
task_id: str,
task_manager: TaskManager = Depends(get_task_manager),
_ = Depends(has_permission("tasks", "READ"))
):
"""
Get unique sources for a task's logs.
"""
with belief_scope("get_task_log_sources"):
task = task_manager.get_task(task_id)
if not task:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Task not found")
return task_manager.get_task_log_sources(task_id)
# [/DEF:get_task_log_sources:Function]
@router.post("/{task_id}/resolve", response_model=Task)
# [DEF:resolve_task:Function]
# @PURPOSE: Resolve a task that is awaiting mapping.

View File

@@ -1,8 +1,11 @@
# [DEF:AppModule:Module]
# @TIER: CRITICAL
# @SEMANTICS: app, main, entrypoint, fastapi
# @PURPOSE: The main entry point for the FastAPI application. It initializes the app, configures CORS, sets up dependencies, includes API routers, and defines the WebSocket endpoint for log streaming.
# @LAYER: UI (API)
# @RELATION: Depends on the dependency module and API route modules.
# @INVARIANT: Only one FastAPI app instance exists per process.
# @INVARIANT: All WebSocket connections must be properly cleaned up on disconnect.
import sys
from pathlib import Path
@@ -123,26 +126,65 @@ app.include_router(llm.router)
app.include_router(storage.router, prefix="/api/storage", tags=["Storage"])
# [DEF:websocket_endpoint:Function]
# @PURPOSE: Provides a WebSocket endpoint for real-time log streaming of a task.
# @PURPOSE: Provides a WebSocket endpoint for real-time log streaming of a task with server-side filtering.
# @PRE: task_id must be a valid task ID.
# @POST: WebSocket connection is managed and logs are streamed until disconnect.
# @TIER: CRITICAL
# @UX_STATE: Connecting -> Streaming -> (Disconnected)
@app.websocket("/ws/logs/{task_id}")
async def websocket_endpoint(websocket: WebSocket, task_id: str):
async def websocket_endpoint(
websocket: WebSocket,
task_id: str,
source: str = None,
level: str = None
):
"""
WebSocket endpoint for real-time log streaming with optional server-side filtering.
Query Parameters:
source: Filter logs by source component (e.g., "plugin", "superset_api")
level: Filter logs by minimum level (DEBUG, INFO, WARNING, ERROR)
"""
with belief_scope("websocket_endpoint", f"task_id={task_id}"):
await websocket.accept()
logger.info(f"WebSocket connection accepted for task {task_id}")
# Normalize filter parameters
source_filter = source.lower() if source else None
level_filter = level.upper() if level else None
# Level hierarchy for filtering
level_hierarchy = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
min_level = level_hierarchy.get(level_filter, 0) if level_filter else 0
logger.info(f"WebSocket connection accepted for task {task_id} (source={source_filter}, level={level_filter})")
task_manager = get_task_manager()
queue = await task_manager.subscribe_logs(task_id)
def matches_filters(log_entry) -> bool:
"""Check if log entry matches the filter criteria."""
# Check source filter
if source_filter and log_entry.source.lower() != source_filter:
return False
# Check level filter
if level_filter:
log_level = level_hierarchy.get(log_entry.level.upper(), 0)
if log_level < min_level:
return False
return True
try:
# Stream new logs
logger.info(f"Starting log stream for task {task_id}")
# Send initial logs first to build context
# Send initial logs first to build context (apply filters)
initial_logs = task_manager.get_task_logs(task_id)
for log_entry in initial_logs:
log_dict = log_entry.dict()
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
await websocket.send_json(log_dict)
if matches_filters(log_entry):
log_dict = log_entry.dict()
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
await websocket.send_json(log_dict)
# Force a check for AWAITING_INPUT status immediately upon connection
# This ensures that if the task is already waiting when the user connects, they get the prompt.
@@ -160,6 +202,11 @@ async def websocket_endpoint(websocket: WebSocket, task_id: str):
while True:
log_entry = await queue.get()
# Apply server-side filtering
if not matches_filters(log_entry):
continue
log_dict = log_entry.dict()
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
await websocket.send_json(log_dict)

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.core.auth.jwt:Module]
#
# @TIER: STANDARD
# @SEMANTICS: jwt, token, session, auth
# @PURPOSE: JWT token generation and validation logic.
# @LAYER: Core

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.core.auth.logger:Module]
#
# @TIER: STANDARD
# @SEMANTICS: auth, logger, audit, security
# @PURPOSE: Audit logging for security-related events.
# @LAYER: Core

View File

@@ -1,4 +1,5 @@
# [DEF:ConfigModels:Module]
# @TIER: STANDARD
# @SEMANTICS: config, models, pydantic
# @PURPOSE: Defines the data models for application configuration using Pydantic.
# @LAYER: Core
@@ -34,6 +35,7 @@ class Environment(BaseModel):
# @PURPOSE: Defines the configuration for the application's logging system.
class LoggingConfig(BaseModel):
level: str = "INFO"
task_log_level: str = "INFO" # Minimum level for task-specific logs (DEBUG, INFO, WARNING, ERROR)
file_path: Optional[str] = "logs/app.log"
max_bytes: int = 10 * 1024 * 1024
backup_count: int = 5

View File

@@ -19,6 +19,9 @@ _belief_state = threading.local()
# Global flag for belief state logging
_enable_belief_state = True
# Global task log level filter
_task_log_level = "INFO"
# [DEF:BeliefFormatter:Class]
# @PURPOSE: Custom logging formatter that adds belief state prefixes to log messages.
class BeliefFormatter(logging.Formatter):
@@ -58,12 +61,12 @@ class LogEntry(BaseModel):
# @SEMANTICS: logging, context, belief_state
@contextmanager
def belief_scope(anchor_id: str, message: str = ""):
# Log Entry if enabled
# Log Entry if enabled (DEBUG level to reduce noise)
if _enable_belief_state:
entry_msg = f"[{anchor_id}][Entry]"
if message:
entry_msg += f" {message}"
logger.info(entry_msg)
logger.debug(entry_msg)
# Set thread-local anchor_id
old_anchor = getattr(_belief_state, 'anchor_id', None)
@@ -71,13 +74,13 @@ def belief_scope(anchor_id: str, message: str = ""):
try:
yield
# Log Coherence OK and Exit
logger.info(f"[{anchor_id}][Coherence:OK]")
# Log Coherence OK and Exit (DEBUG level to reduce noise)
logger.debug(f"[{anchor_id}][Coherence:OK]")
if _enable_belief_state:
logger.info(f"[{anchor_id}][Exit]")
logger.debug(f"[{anchor_id}][Exit]")
except Exception as e:
# Log Coherence Failed
logger.info(f"[{anchor_id}][Coherence:Failed] {str(e)}")
# Log Coherence Failed (DEBUG level to reduce noise)
logger.debug(f"[{anchor_id}][Coherence:Failed] {str(e)}")
raise
finally:
# Restore old anchor
@@ -88,12 +91,13 @@ def belief_scope(anchor_id: str, message: str = ""):
# [DEF:configure_logger:Function]
# @PURPOSE: Configures the logger with the provided logging settings.
# @PRE: config is a valid LoggingConfig instance.
# @POST: Logger level, handlers, and belief state flag are updated.
# @POST: Logger level, handlers, belief state flag, and task log level are updated.
# @PARAM: config (LoggingConfig) - The logging configuration.
# @SEMANTICS: logging, configuration, initialization
def configure_logger(config):
global _enable_belief_state
global _enable_belief_state, _task_log_level
_enable_belief_state = config.enable_belief_state
_task_log_level = config.task_log_level.upper()
# Set logger level
level = getattr(logging, config.level.upper(), logging.INFO)
@@ -130,6 +134,36 @@ def configure_logger(config):
))
# [/DEF:configure_logger:Function]
# [DEF:get_task_log_level:Function]
# @PURPOSE: Returns the current task log level filter.
# @PRE: None.
# @POST: Returns the task log level string.
# @RETURN: str - The current task log level (DEBUG, INFO, WARNING, ERROR).
# @SEMANTICS: logging, configuration, getter
def get_task_log_level() -> str:
"""Returns the current task log level filter."""
return _task_log_level
# [/DEF:get_task_log_level:Function]
# [DEF:should_log_task_level:Function]
# @PURPOSE: Checks if a log level should be recorded based on task_log_level setting.
# @PRE: level is a valid log level string.
# @POST: Returns True if level meets or exceeds task_log_level threshold.
# @PARAM: level (str) - The log level to check.
# @RETURN: bool - True if the level should be logged.
# @SEMANTICS: logging, filter, level
def should_log_task_level(level: str) -> bool:
"""Checks if a log level should be recorded based on task_log_level setting."""
level_order = {"DEBUG": 0, "INFO": 1, "WARNING": 2, "ERROR": 3}
current_level = _task_log_level.upper()
check_level = level.upper()
current_order = level_order.get(current_level, 1) # Default to INFO
check_order = level_order.get(check_level, 1)
return check_order >= current_order
# [/DEF:should_log_task_level:Function]
# [DEF:WebSocketLogHandler:Class]
# @SEMANTICS: logging, handler, websocket, buffer
# @PURPOSE: A custom logging handler that captures log records into a buffer. It is designed to be extended for real-time log streaming over WebSockets.

View File

@@ -7,6 +7,7 @@ from jsonschema import validate
from .logger import belief_scope
# [DEF:PluginLoader:Class]
# @TIER: STANDARD
# @SEMANTICS: plugin, loader, dynamic, import
# @PURPOSE: Scans a specified directory for Python modules, dynamically loads them, and registers any classes that are valid implementations of the PluginBase interface.
# @LAYER: Core

View File

@@ -1,4 +1,5 @@
# [DEF:SchedulerModule:Module]
# @TIER: STANDARD
# @SEMANTICS: scheduler, apscheduler, cron, backup
# @PURPOSE: Manages scheduled tasks using APScheduler.
# @LAYER: Core
@@ -14,6 +15,7 @@ import asyncio
# [/SECTION]
# [DEF:SchedulerService:Class]
# @TIER: STANDARD
# @SEMANTICS: scheduler, service, apscheduler
# @PURPOSE: Provides a service to manage scheduled backup tasks.
class SchedulerService:

View File

@@ -1,4 +1,5 @@
# [DEF:TaskManagerPackage:Module]
# @TIER: TRIVIAL
# @SEMANTICS: task, manager, package, exports
# @PURPOSE: Exports the public API of the task manager package.
# @LAYER: Core

View File

@@ -1,47 +1,76 @@
# [DEF:TaskCleanupModule:Module]
# @SEMANTICS: task, cleanup, retention
# @PURPOSE: Implements task cleanup and retention policies.
# @TIER: STANDARD
# @SEMANTICS: task, cleanup, retention, logs
# @PURPOSE: Implements task cleanup and retention policies, including associated logs.
# @LAYER: Core
# @RELATION: Uses TaskPersistenceService to delete old tasks.
# @RELATION: Uses TaskPersistenceService and TaskLogPersistenceService to delete old tasks and logs.
from datetime import datetime, timedelta
from .persistence import TaskPersistenceService
from typing import List
from .persistence import TaskPersistenceService, TaskLogPersistenceService
from ..logger import logger, belief_scope
from ..config_manager import ConfigManager
# [DEF:TaskCleanupService:Class]
# @PURPOSE: Provides methods to clean up old task records.
# @PURPOSE: Provides methods to clean up old task records and their associated logs.
# @TIER: STANDARD
class TaskCleanupService:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the cleanup service with dependencies.
# @PRE: persistence_service and config_manager are valid.
# @POST: Cleanup service is ready.
def __init__(self, persistence_service: TaskPersistenceService, config_manager: ConfigManager):
def __init__(
self,
persistence_service: TaskPersistenceService,
log_persistence_service: TaskLogPersistenceService,
config_manager: ConfigManager
):
self.persistence_service = persistence_service
self.log_persistence_service = log_persistence_service
self.config_manager = config_manager
# [/DEF:__init__:Function]
# [DEF:run_cleanup:Function]
# @PURPOSE: Deletes tasks older than the configured retention period.
# @PURPOSE: Deletes tasks older than the configured retention period and their logs.
# @PRE: Config manager has valid settings.
# @POST: Old tasks are deleted from persistence.
# @POST: Old tasks and their logs are deleted from persistence.
def run_cleanup(self):
with belief_scope("TaskCleanupService.run_cleanup"):
settings = self.config_manager.get_config().settings
retention_days = settings.task_retention_days
# This is a simplified implementation.
# In a real scenario, we would query IDs of tasks older than retention_days.
# For now, we'll log the action.
logger.info(f"Cleaning up tasks older than {retention_days} days.")
# Re-loading tasks to check for limit
# Load tasks to check for limit
tasks = self.persistence_service.load_tasks(limit=1000)
if len(tasks) > settings.task_retention_limit:
to_delete = [t.id for t in tasks[settings.task_retention_limit:]]
to_delete: List[str] = [t.id for t in tasks[settings.task_retention_limit:]]
# Delete logs first (before task records)
self.log_persistence_service.delete_logs_for_tasks(to_delete)
# Then delete task records
self.persistence_service.delete_tasks(to_delete)
logger.info(f"Deleted {len(to_delete)} tasks exceeding limit of {settings.task_retention_limit}")
logger.info(f"Deleted {len(to_delete)} tasks and their logs exceeding limit of {settings.task_retention_limit}")
# [/DEF:run_cleanup:Function]
# [DEF:delete_task_with_logs:Function]
# @PURPOSE: Delete a single task and all its associated logs.
# @PRE: task_id is a valid task ID.
# @POST: Task and all its logs are deleted.
# @PARAM: task_id (str) - The task ID to delete.
def delete_task_with_logs(self, task_id: str) -> None:
"""Delete a single task and all its associated logs."""
with belief_scope("TaskCleanupService.delete_task_with_logs", f"task_id={task_id}"):
# Delete logs first
self.log_persistence_service.delete_logs_for_task(task_id)
# Then delete task record
self.persistence_service.delete_tasks([task_id])
logger.info(f"Deleted task {task_id} and its associated logs")
# [/DEF:delete_task_with_logs:Function]
# [/DEF:TaskCleanupService:Class]
# [/DEF:TaskCleanupModule:Module]

View File

@@ -0,0 +1,115 @@
# [DEF:TaskContextModule:Module]
# @SEMANTICS: task, context, plugin, execution, logger
# @PURPOSE: Provides execution context passed to plugins during task execution.
# @LAYER: Core
# @RELATION: DEPENDS_ON -> TaskLogger, USED_BY -> plugins
# @TIER: CRITICAL
# @INVARIANT: Each TaskContext is bound to a single task execution.
# [SECTION: IMPORTS]
from typing import Dict, Any, Optional, Callable
from .task_logger import TaskLogger
# [/SECTION]
# [DEF:TaskContext:Class]
# @SEMANTICS: context, task, execution, plugin
# @PURPOSE: A container passed to plugin.execute() providing the logger and other task-specific utilities.
# @TIER: CRITICAL
# @INVARIANT: logger is always a valid TaskLogger instance.
# @UX_STATE: Idle -> Active -> Complete
class TaskContext:
"""
Execution context provided to plugins during task execution.
Usage:
def execute(params: dict, context: TaskContext = None):
if context:
context.logger.info("Starting process")
context.logger.progress("Processing items", percent=50)
# ... plugin logic
"""
# [DEF:__init__:Function]
# @PURPOSE: Initialize the TaskContext with task-specific resources.
# @PRE: task_id is a valid task identifier, add_log_fn is callable.
# @POST: TaskContext is ready to be passed to plugin.execute().
# @PARAM: task_id (str) - The ID of the task.
# @PARAM: add_log_fn (Callable) - Function to add log to TaskManager.
# @PARAM: params (Dict) - Task parameters.
# @PARAM: default_source (str) - Default source for logs (default: "plugin").
def __init__(
self,
task_id: str,
add_log_fn: Callable,
params: Dict[str, Any],
default_source: str = "plugin"
):
self._task_id = task_id
self._params = params
self._logger = TaskLogger(
task_id=task_id,
add_log_fn=add_log_fn,
source=default_source
)
# [/DEF:__init__:Function]
# [DEF:task_id:Function]
# @PURPOSE: Get the task ID.
# @PRE: TaskContext must be initialized.
# @POST: Returns the task ID string.
# @RETURN: str - The task ID.
@property
def task_id(self) -> str:
return self._task_id
# [/DEF:task_id:Function]
# [DEF:logger:Function]
# @PURPOSE: Get the TaskLogger instance for this context.
# @PRE: TaskContext must be initialized.
# @POST: Returns the TaskLogger instance.
# @RETURN: TaskLogger - The logger instance.
@property
def logger(self) -> TaskLogger:
return self._logger
# [/DEF:logger:Function]
# [DEF:params:Function]
# @PURPOSE: Get the task parameters.
# @PRE: TaskContext must be initialized.
# @POST: Returns the parameters dictionary.
# @RETURN: Dict[str, Any] - The task parameters.
@property
def params(self) -> Dict[str, Any]:
return self._params
# [/DEF:params:Function]
# [DEF:get_param:Function]
# @PURPOSE: Get a specific parameter value with optional default.
# @PRE: TaskContext must be initialized.
# @POST: Returns parameter value or default.
# @PARAM: key (str) - Parameter key.
# @PARAM: default (Any) - Default value if key not found.
# @RETURN: Any - Parameter value or default.
def get_param(self, key: str, default: Any = None) -> Any:
return self._params.get(key, default)
# [/DEF:get_param:Function]
# [DEF:create_sub_context:Function]
# @PURPOSE: Create a sub-context with a different default source.
# @PRE: source is a non-empty string.
# @POST: Returns new TaskContext with different logger source.
# @PARAM: source (str) - New default source for logging.
# @RETURN: TaskContext - New context with different source.
def create_sub_context(self, source: str) -> "TaskContext":
"""Create a sub-context with a different default source for logging."""
return TaskContext(
task_id=self._task_id,
add_log_fn=self._logger._add_log,
params=self._params,
default_source=source
)
# [/DEF:create_sub_context:Function]
# [/DEF:TaskContext:Class]
# [/DEF:TaskContextModule:Module]

View File

@@ -8,23 +8,33 @@
# [SECTION: IMPORTS]
import asyncio
import threading
import inspect
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import Dict, Any, List, Optional
from concurrent.futures import ThreadPoolExecutor
from .models import Task, TaskStatus, LogEntry
from .persistence import TaskPersistenceService
from ..logger import logger, belief_scope
from .models import Task, TaskStatus, LogEntry, LogFilter, LogStats, TaskLog
from .persistence import TaskPersistenceService, TaskLogPersistenceService
from .context import TaskContext
from ..logger import logger, belief_scope, should_log_task_level
# [/SECTION]
# [DEF:TaskManager:Class]
# @SEMANTICS: task, manager, lifecycle, execution, state
# @PURPOSE: Manages the lifecycle of tasks, including their creation, execution, and state tracking.
# @TIER: CRITICAL
# @INVARIANT: Task IDs are unique within the registry.
# @INVARIANT: Each task has exactly one status at any time.
# @INVARIANT: Log entries are never deleted after being added to a task.
class TaskManager:
"""
Manages the lifecycle of tasks, including their creation, execution, and state tracking.
"""
# Log flush interval in seconds
LOG_FLUSH_INTERVAL = 2.0
# [DEF:__init__:Function]
# @PURPOSE: Initialize the TaskManager with dependencies.
# @PRE: plugin_loader is initialized.
@@ -35,8 +45,18 @@ class TaskManager:
self.plugin_loader = plugin_loader
self.tasks: Dict[str, Task] = {}
self.subscribers: Dict[str, List[asyncio.Queue]] = {}
self.executor = ThreadPoolExecutor(max_workers=5) # For CPU-bound plugin execution
self.executor = ThreadPoolExecutor(max_workers=5) # For CPU-bound plugin execution
self.persistence_service = TaskPersistenceService()
self.log_persistence_service = TaskLogPersistenceService()
# Log buffer: task_id -> List[LogEntry]
self._log_buffer: Dict[str, List[LogEntry]] = {}
self._log_buffer_lock = threading.Lock()
# Flusher thread for batch writing logs
self._flusher_stop_event = threading.Event()
self._flusher_thread = threading.Thread(target=self._flusher_loop, daemon=True)
self._flusher_thread.start()
try:
self.loop = asyncio.get_running_loop()
@@ -47,6 +67,59 @@ class TaskManager:
# Load persisted tasks on startup
self.load_persisted_tasks()
# [/DEF:__init__:Function]
# [DEF:_flusher_loop:Function]
# @PURPOSE: Background thread that periodically flushes log buffer to database.
# @PRE: TaskManager is initialized.
# @POST: Logs are batch-written to database every LOG_FLUSH_INTERVAL seconds.
def _flusher_loop(self):
"""Background thread that flushes log buffer to database."""
while not self._flusher_stop_event.is_set():
self._flush_logs()
self._flusher_stop_event.wait(self.LOG_FLUSH_INTERVAL)
# [/DEF:_flusher_loop:Function]
# [DEF:_flush_logs:Function]
# @PURPOSE: Flush all buffered logs to the database.
# @PRE: None.
# @POST: All buffered logs are written to task_logs table.
def _flush_logs(self):
"""Flush all buffered logs to the database."""
with self._log_buffer_lock:
task_ids = list(self._log_buffer.keys())
for task_id in task_ids:
with self._log_buffer_lock:
logs = self._log_buffer.pop(task_id, [])
if logs:
try:
self.log_persistence_service.add_logs(task_id, logs)
except Exception as e:
logger.error(f"Failed to flush logs for task {task_id}: {e}")
# Re-add logs to buffer on failure
with self._log_buffer_lock:
if task_id not in self._log_buffer:
self._log_buffer[task_id] = []
self._log_buffer[task_id].extend(logs)
# [/DEF:_flush_logs:Function]
# [DEF:_flush_task_logs:Function]
# @PURPOSE: Flush logs for a specific task immediately.
# @PRE: task_id exists.
# @POST: Task's buffered logs are written to database.
# @PARAM: task_id (str) - The task ID.
def _flush_task_logs(self, task_id: str):
"""Flush logs for a specific task immediately."""
with self._log_buffer_lock:
logs = self._log_buffer.pop(task_id, [])
if logs:
try:
self.log_persistence_service.add_logs(task_id, logs)
except Exception as e:
logger.error(f"Failed to flush logs for task {task_id}: {e}")
# [/DEF:_flush_task_logs:Function]
# [DEF:create_task:Function]
# @PURPOSE: Creates and queues a new task for execution.
@@ -78,7 +151,7 @@ class TaskManager:
# [/DEF:create_task:Function]
# [DEF:_run_task:Function]
# @PURPOSE: Internal method to execute a task.
# @PURPOSE: Internal method to execute a task with TaskContext support.
# @PRE: Task exists in registry.
# @POST: Task is executed, status updated to SUCCESS or FAILED.
# @PARAM: task_id (str) - The ID of the task to run.
@@ -91,30 +164,54 @@ class TaskManager:
task.status = TaskStatus.RUNNING
task.started_at = datetime.utcnow()
self.persistence_service.persist_task(task)
self._add_log(task_id, "INFO", f"Task started for plugin '{plugin.name}'")
self._add_log(task_id, "INFO", f"Task started for plugin '{plugin.name}'", source="system")
try:
# Execute plugin
# Prepare params and check if plugin supports new TaskContext
params = {**task.params, "_task_id": task_id}
if asyncio.iscoroutinefunction(plugin.execute):
task.result = await plugin.execute(params)
else:
task.result = await self.loop.run_in_executor(
self.executor,
plugin.execute,
params
# Check if plugin's execute method accepts 'context' parameter
sig = inspect.signature(plugin.execute)
accepts_context = 'context' in sig.parameters
if accepts_context:
# Create TaskContext for new-style plugins
context = TaskContext(
task_id=task_id,
add_log_fn=self._add_log,
params=params,
default_source="plugin"
)
if asyncio.iscoroutinefunction(plugin.execute):
task.result = await plugin.execute(params, context=context)
else:
task.result = await self.loop.run_in_executor(
self.executor,
lambda: plugin.execute(params, context=context)
)
else:
# Backward compatibility: old-style plugins without context
if asyncio.iscoroutinefunction(plugin.execute):
task.result = await plugin.execute(params)
else:
task.result = await self.loop.run_in_executor(
self.executor,
plugin.execute,
params
)
logger.info(f"Task {task_id} completed successfully")
task.status = TaskStatus.SUCCESS
self._add_log(task_id, "INFO", f"Task completed successfully for plugin '{plugin.name}'")
self._add_log(task_id, "INFO", f"Task completed successfully for plugin '{plugin.name}'", source="system")
except Exception as e:
logger.error(f"Task {task_id} failed: {e}")
task.status = TaskStatus.FAILED
self._add_log(task_id, "ERROR", f"Task failed: {e}", {"error_type": type(e).__name__})
self._add_log(task_id, "ERROR", f"Task failed: {e}", source="system", metadata={"error_type": type(e).__name__})
finally:
task.finished_at = datetime.utcnow()
# Flush any remaining buffered logs before persisting task
self._flush_task_logs(task_id)
self.persistence_service.persist_task(task)
logger.info(f"Task {task_id} execution finished with status: {task.status}")
# [/DEF:_run_task:Function]
@@ -224,36 +321,106 @@ class TaskManager:
# [/DEF:get_tasks:Function]
# [DEF:get_task_logs:Function]
# @PURPOSE: Retrieves logs for a specific task.
# @PURPOSE: Retrieves logs for a specific task (from memory for running, persistence for completed).
# @PRE: task_id is a string.
# @POST: Returns list of LogEntry objects.
# @POST: Returns list of LogEntry or TaskLog objects.
# @PARAM: task_id (str) - ID of the task.
# @PARAM: log_filter (Optional[LogFilter]) - Filter parameters.
# @RETURN: List[LogEntry] - List of log entries.
def get_task_logs(self, task_id: str) -> List[LogEntry]:
def get_task_logs(self, task_id: str, log_filter: Optional[LogFilter] = None) -> List[LogEntry]:
with belief_scope("TaskManager.get_task_logs", f"task_id={task_id}"):
task = self.tasks.get(task_id)
# For completed tasks, fetch from persistence
if task and task.status in [TaskStatus.SUCCESS, TaskStatus.FAILED]:
if log_filter is None:
log_filter = LogFilter()
task_logs = self.log_persistence_service.get_logs(task_id, log_filter)
# Convert TaskLog to LogEntry for backward compatibility
return [
LogEntry(
timestamp=log.timestamp,
level=log.level,
message=log.message,
source=log.source,
metadata=log.metadata
)
for log in task_logs
]
# For running/pending tasks, return from memory
return task.logs if task else []
# [/DEF:get_task_logs:Function]
# [DEF:get_task_log_stats:Function]
# @PURPOSE: Get statistics about logs for a task.
# @PRE: task_id is a valid task ID.
# @POST: Returns LogStats with counts by level and source.
# @PARAM: task_id (str) - The task ID.
# @RETURN: LogStats - Statistics about task logs.
def get_task_log_stats(self, task_id: str) -> LogStats:
with belief_scope("TaskManager.get_task_log_stats", f"task_id={task_id}"):
return self.log_persistence_service.get_log_stats(task_id)
# [/DEF:get_task_log_stats:Function]
# [DEF:get_task_log_sources:Function]
# @PURPOSE: Get unique sources for a task's logs.
# @PRE: task_id is a valid task ID.
# @POST: Returns list of unique source strings.
# @PARAM: task_id (str) - The task ID.
# @RETURN: List[str] - Unique source names.
def get_task_log_sources(self, task_id: str) -> List[str]:
with belief_scope("TaskManager.get_task_log_sources", f"task_id={task_id}"):
return self.log_persistence_service.get_sources(task_id)
# [/DEF:get_task_log_sources:Function]
# [DEF:_add_log:Function]
# @PURPOSE: Adds a log entry to a task and notifies subscribers.
# @PURPOSE: Adds a log entry to a task buffer and notifies subscribers.
# @PRE: Task exists.
# @POST: Log added to task and pushed to queues.
# @POST: Log added to buffer and pushed to queues (if level meets task_log_level filter).
# @PARAM: task_id (str) - ID of the task.
# @PARAM: level (str) - Log level.
# @PARAM: message (str) - Log message.
# @PARAM: context (Optional[Dict]) - Log context.
def _add_log(self, task_id: str, level: str, message: str, context: Optional[Dict[str, Any]] = None):
# @PARAM: source (str) - Source component (default: "system").
# @PARAM: metadata (Optional[Dict]) - Additional structured data.
# @PARAM: context (Optional[Dict]) - Legacy context (for backward compatibility).
def _add_log(
self,
task_id: str,
level: str,
message: str,
source: str = "system",
metadata: Optional[Dict[str, Any]] = None,
context: Optional[Dict[str, Any]] = None
):
with belief_scope("TaskManager._add_log", f"task_id={task_id}"):
task = self.tasks.get(task_id)
if not task:
return
log_entry = LogEntry(level=level, message=message, context=context)
task.logs.append(log_entry)
self.persistence_service.persist_task(task)
# Filter logs based on task_log_level configuration
if not should_log_task_level(level):
return
# Notify subscribers
# Create log entry with new fields
log_entry = LogEntry(
level=level,
message=message,
source=source,
metadata=metadata,
context=context # Keep for backward compatibility
)
# Add to in-memory logs (for backward compatibility with legacy JSON field)
task.logs.append(log_entry)
# Add to buffer for batch persistence
with self._log_buffer_lock:
if task_id not in self._log_buffer:
self._log_buffer[task_id] = []
self._log_buffer[task_id].append(log_entry)
# Notify subscribers (for real-time WebSocket updates)
if task_id in self.subscribers:
for queue in self.subscribers[task_id]:
self.loop.call_soon_threadsafe(queue.put_nowait, log_entry)
@@ -353,7 +520,7 @@ class TaskManager:
# [/DEF:resume_task_with_password:Function]
# [DEF:clear_tasks:Function]
# @PURPOSE: Clears tasks based on status filter.
# @PURPOSE: Clears tasks based on status filter (also deletes associated logs).
# @PRE: status is Optional[TaskStatus].
# @POST: Tasks matching filter (or all non-active) cleared from registry and database.
# @PARAM: status (Optional[TaskStatus]) - Filter by task status.
@@ -387,9 +554,13 @@ class TaskManager:
del self.tasks[tid]
# Remove from persistence
# Remove from persistence (task_records and task_logs via CASCADE)
self.persistence_service.delete_tasks(tasks_to_remove)
# Also explicitly delete logs (in case CASCADE is not set up)
if tasks_to_remove:
self.log_persistence_service.delete_logs_for_tasks(tasks_to_remove)
logger.info(f"Cleared {len(tasks_to_remove)} tasks.")
return len(tasks_to_remove)
# [/DEF:clear_tasks:Function]

View File

@@ -1,4 +1,5 @@
# [DEF:TaskManagerModels:Module]
# @TIER: STANDARD
# @SEMANTICS: task, models, pydantic, enum, state
# @PURPOSE: Defines the data models and enumerations used by the Task Manager.
# @LAYER: Core
@@ -16,6 +17,7 @@ from pydantic import BaseModel, Field
# [/SECTION]
# [DEF:TaskStatus:Enum]
# @TIER: TRIVIAL
# @SEMANTICS: task, status, state, enum
# @PURPOSE: Defines the possible states a task can be in during its lifecycle.
class TaskStatus(str, Enum):
@@ -27,17 +29,73 @@ class TaskStatus(str, Enum):
AWAITING_INPUT = "AWAITING_INPUT"
# [/DEF:TaskStatus:Enum]
# [DEF:LogLevel:Enum]
# @SEMANTICS: log, level, severity, enum
# @PURPOSE: Defines the possible log levels for task logging.
# @TIER: STANDARD
class LogLevel(str, Enum):
DEBUG = "DEBUG"
INFO = "INFO"
WARNING = "WARNING"
ERROR = "ERROR"
# [/DEF:LogLevel:Enum]
# [DEF:LogEntry:Class]
# @SEMANTICS: log, entry, record, pydantic
# @PURPOSE: A Pydantic model representing a single, structured log entry associated with a task.
# @TIER: CRITICAL
# @INVARIANT: Each log entry has a unique timestamp and source.
class LogEntry(BaseModel):
timestamp: datetime = Field(default_factory=datetime.utcnow)
level: str
level: str = Field(default="INFO")
message: str
context: Optional[Dict[str, Any]] = None
source: str = Field(default="system") # Component attribution: plugin, superset_api, git, etc.
context: Optional[Dict[str, Any]] = None # Legacy field, kept for backward compatibility
metadata: Optional[Dict[str, Any]] = None # Structured metadata (e.g., dashboard_id, progress)
# [/DEF:LogEntry:Class]
# [DEF:TaskLog:Class]
# @SEMANTICS: task, log, persistent, pydantic
# @PURPOSE: A Pydantic model representing a persisted log entry from the database.
# @TIER: STANDARD
# @RELATION: MAPS_TO -> TaskLogRecord
class TaskLog(BaseModel):
id: int
task_id: str
timestamp: datetime
level: str
source: str
message: str
metadata: Optional[Dict[str, Any]] = None
class Config:
from_attributes = True
# [/DEF:TaskLog:Class]
# [DEF:LogFilter:Class]
# @SEMANTICS: log, filter, query, pydantic
# @PURPOSE: Filter parameters for querying task logs.
# @TIER: STANDARD
class LogFilter(BaseModel):
level: Optional[str] = None # Filter by log level
source: Optional[str] = None # Filter by source component
search: Optional[str] = None # Text search in message
offset: int = Field(default=0, ge=0)
limit: int = Field(default=100, ge=1, le=1000)
# [/DEF:LogFilter:Class]
# [DEF:LogStats:Class]
# @SEMANTICS: log, stats, aggregation, pydantic
# @PURPOSE: Statistics about log entries for a task.
# @TIER: STANDARD
class LogStats(BaseModel):
total_count: int
by_level: Dict[str, int] # {"INFO": 10, "ERROR": 2}
by_source: Dict[str, int] # {"plugin": 5, "superset_api": 7}
# [/DEF:LogStats:Class]
# [DEF:Task:Class]
# @TIER: STANDARD
# @SEMANTICS: task, job, execution, state, pydantic
# @PURPOSE: A Pydantic model representing a single execution instance of a plugin, including its status, parameters, and logs.
class Task(BaseModel):

View File

@@ -11,9 +11,10 @@ from typing import List, Optional, Dict, Any
import json
from sqlalchemy.orm import Session
from ...models.task import TaskRecord
from sqlalchemy import and_, or_
from ...models.task import TaskRecord, TaskLogRecord
from ..database import TasksSessionLocal
from .models import Task, TaskStatus, LogEntry
from .models import Task, TaskStatus, LogEntry, TaskLog, LogFilter, LogStats
from ..logger import logger, belief_scope
# [/SECTION]
@@ -170,4 +171,215 @@ class TaskPersistenceService:
# [/DEF:delete_tasks:Function]
# [/DEF:TaskPersistenceService:Class]
# [DEF:TaskLogPersistenceService:Class]
# @SEMANTICS: persistence, service, database, log, sqlalchemy
# @PURPOSE: Provides methods to save and query task logs from the task_logs table.
# @TIER: CRITICAL
# @RELATION: DEPENDS_ON -> TaskLogRecord
# @INVARIANT: Log entries are batch-inserted for performance.
class TaskLogPersistenceService:
"""
Service for persisting and querying task logs.
Supports batch inserts, filtering, and statistics.
"""
# [DEF:__init__:Function]
# @PURPOSE: Initialize the log persistence service.
# @POST: Service is ready.
def __init__(self):
pass
# [/DEF:__init__:Function]
# [DEF:add_logs:Function]
# @PURPOSE: Batch insert log entries for a task.
# @PRE: logs is a list of LogEntry objects.
# @POST: All logs inserted into task_logs table.
# @PARAM: task_id (str) - The task ID.
# @PARAM: logs (List[LogEntry]) - Log entries to insert.
# @SIDE_EFFECT: Writes to task_logs table.
def add_logs(self, task_id: str, logs: List[LogEntry]) -> None:
if not logs:
return
with belief_scope("TaskLogPersistenceService.add_logs", f"task_id={task_id}"):
session: Session = TasksSessionLocal()
try:
for log in logs:
record = TaskLogRecord(
task_id=task_id,
timestamp=log.timestamp,
level=log.level,
source=log.source or "system",
message=log.message,
metadata_json=json.dumps(log.metadata) if log.metadata else None
)
session.add(record)
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Failed to add logs for task {task_id}: {e}")
finally:
session.close()
# [/DEF:add_logs:Function]
# [DEF:get_logs:Function]
# @PURPOSE: Query logs for a task with filtering and pagination.
# @PRE: task_id is a valid task ID.
# @POST: Returns list of TaskLog objects matching filters.
# @PARAM: task_id (str) - The task ID.
# @PARAM: log_filter (LogFilter) - Filter parameters.
# @RETURN: List[TaskLog] - Filtered log entries.
def get_logs(self, task_id: str, log_filter: LogFilter) -> List[TaskLog]:
with belief_scope("TaskLogPersistenceService.get_logs", f"task_id={task_id}"):
session: Session = TasksSessionLocal()
try:
query = session.query(TaskLogRecord).filter(TaskLogRecord.task_id == task_id)
# Apply filters
if log_filter.level:
query = query.filter(TaskLogRecord.level == log_filter.level.upper())
if log_filter.source:
query = query.filter(TaskLogRecord.source == log_filter.source)
if log_filter.search:
search_pattern = f"%{log_filter.search}%"
query = query.filter(TaskLogRecord.message.ilike(search_pattern))
# Order by timestamp ascending (oldest first)
query = query.order_by(TaskLogRecord.timestamp.asc())
# Apply pagination
records = query.offset(log_filter.offset).limit(log_filter.limit).all()
logs = []
for record in records:
metadata = None
if record.metadata_json:
try:
metadata = json.loads(record.metadata_json)
except json.JSONDecodeError:
metadata = None
logs.append(TaskLog(
id=record.id,
task_id=record.task_id,
timestamp=record.timestamp,
level=record.level,
source=record.source,
message=record.message,
metadata=metadata
))
return logs
finally:
session.close()
# [/DEF:get_logs:Function]
# [DEF:get_log_stats:Function]
# @PURPOSE: Get statistics about logs for a task.
# @PRE: task_id is a valid task ID.
# @POST: Returns LogStats with counts by level and source.
# @PARAM: task_id (str) - The task ID.
# @RETURN: LogStats - Statistics about task logs.
def get_log_stats(self, task_id: str) -> LogStats:
with belief_scope("TaskLogPersistenceService.get_log_stats", f"task_id={task_id}"):
session: Session = TasksSessionLocal()
try:
# Get total count
total_count = session.query(TaskLogRecord).filter(
TaskLogRecord.task_id == task_id
).count()
# Get counts by level
from sqlalchemy import func
level_counts = session.query(
TaskLogRecord.level,
func.count(TaskLogRecord.id)
).filter(
TaskLogRecord.task_id == task_id
).group_by(TaskLogRecord.level).all()
by_level = {level: count for level, count in level_counts}
# Get counts by source
source_counts = session.query(
TaskLogRecord.source,
func.count(TaskLogRecord.id)
).filter(
TaskLogRecord.task_id == task_id
).group_by(TaskLogRecord.source).all()
by_source = {source: count for source, count in source_counts}
return LogStats(
total_count=total_count,
by_level=by_level,
by_source=by_source
)
finally:
session.close()
# [/DEF:get_log_stats:Function]
# [DEF:get_sources:Function]
# @PURPOSE: Get unique sources for a task's logs.
# @PRE: task_id is a valid task ID.
# @POST: Returns list of unique source strings.
# @PARAM: task_id (str) - The task ID.
# @RETURN: List[str] - Unique source names.
def get_sources(self, task_id: str) -> List[str]:
with belief_scope("TaskLogPersistenceService.get_sources", f"task_id={task_id}"):
session: Session = TasksSessionLocal()
try:
from sqlalchemy import distinct
sources = session.query(distinct(TaskLogRecord.source)).filter(
TaskLogRecord.task_id == task_id
).all()
return [s[0] for s in sources]
finally:
session.close()
# [/DEF:get_sources:Function]
# [DEF:delete_logs_for_task:Function]
# @PURPOSE: Delete all logs for a specific task.
# @PRE: task_id is a valid task ID.
# @POST: All logs for the task are deleted.
# @PARAM: task_id (str) - The task ID.
# @SIDE_EFFECT: Deletes from task_logs table.
def delete_logs_for_task(self, task_id: str) -> None:
with belief_scope("TaskLogPersistenceService.delete_logs_for_task", f"task_id={task_id}"):
session: Session = TasksSessionLocal()
try:
session.query(TaskLogRecord).filter(
TaskLogRecord.task_id == task_id
).delete(synchronize_session=False)
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Failed to delete logs for task {task_id}: {e}")
finally:
session.close()
# [/DEF:delete_logs_for_task:Function]
# [DEF:delete_logs_for_tasks:Function]
# @PURPOSE: Delete all logs for multiple tasks.
# @PRE: task_ids is a list of task IDs.
# @POST: All logs for the tasks are deleted.
# @PARAM: task_ids (List[str]) - List of task IDs.
def delete_logs_for_tasks(self, task_ids: List[str]) -> None:
if not task_ids:
return
with belief_scope("TaskLogPersistenceService.delete_logs_for_tasks"):
session: Session = TasksSessionLocal()
try:
session.query(TaskLogRecord).filter(
TaskLogRecord.task_id.in_(task_ids)
).delete(synchronize_session=False)
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Failed to delete logs for tasks: {e}")
finally:
session.close()
# [/DEF:delete_logs_for_tasks:Function]
# [/DEF:TaskLogPersistenceService:Class]
# [/DEF:TaskPersistenceModule:Module]

View File

@@ -0,0 +1,168 @@
# [DEF:TaskLoggerModule:Module]
# @SEMANTICS: task, logger, context, plugin, attribution
# @PURPOSE: Provides a dedicated logger for tasks with automatic source attribution.
# @LAYER: Core
# @RELATION: DEPENDS_ON -> TaskManager, CALLS -> TaskManager._add_log
# @TIER: CRITICAL
# @INVARIANT: Each TaskLogger instance is bound to a specific task_id and default source.
# [SECTION: IMPORTS]
from typing import Dict, Any, Optional, Callable
from datetime import datetime
# [/SECTION]
# [DEF:TaskLogger:Class]
# @SEMANTICS: logger, task, source, attribution
# @PURPOSE: A wrapper around TaskManager._add_log that carries task_id and source context.
# @TIER: CRITICAL
# @INVARIANT: All log calls include the task_id and source.
# @UX_STATE: Idle -> Logging -> (system records log)
class TaskLogger:
"""
A dedicated logger for tasks that automatically tags logs with source attribution.
Usage:
logger = TaskLogger(task_id="abc123", add_log_fn=task_manager._add_log, source="plugin")
logger.info("Starting backup process")
logger.error("Failed to connect", metadata={"error_code": 500})
# Create sub-logger with different source
api_logger = logger.with_source("superset_api")
api_logger.info("Fetching dashboards")
"""
# [DEF:__init__:Function]
# @PURPOSE: Initialize the TaskLogger with task context.
# @PRE: add_log_fn is a callable that accepts (task_id, level, message, context, source, metadata).
# @POST: TaskLogger is ready to log messages.
# @PARAM: task_id (str) - The ID of the task.
# @PARAM: add_log_fn (Callable) - Function to add log to TaskManager.
# @PARAM: source (str) - Default source for logs (default: "plugin").
def __init__(
self,
task_id: str,
add_log_fn: Callable,
source: str = "plugin"
):
self._task_id = task_id
self._add_log = add_log_fn
self._default_source = source
# [/DEF:__init__:Function]
# [DEF:with_source:Function]
# @PURPOSE: Create a sub-logger with a different default source.
# @PRE: source is a non-empty string.
# @POST: Returns new TaskLogger with the same task_id but different source.
# @PARAM: source (str) - New default source.
# @RETURN: TaskLogger - New logger instance.
def with_source(self, source: str) -> "TaskLogger":
"""Create a sub-logger with a different source context."""
return TaskLogger(
task_id=self._task_id,
add_log_fn=self._add_log,
source=source
)
# [/DEF:with_source:Function]
# [DEF:_log:Function]
# @PURPOSE: Internal method to log a message at a given level.
# @PRE: level is a valid log level string.
# @POST: Log entry added via add_log_fn.
# @PARAM: level (str) - Log level (DEBUG, INFO, WARNING, ERROR).
# @PARAM: message (str) - Log message.
# @PARAM: source (Optional[str]) - Override source for this log entry.
# @PARAM: metadata (Optional[Dict]) - Additional structured data.
def _log(
self,
level: str,
message: str,
source: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> None:
"""Internal logging method."""
self._add_log(
task_id=self._task_id,
level=level,
message=message,
source=source or self._default_source,
metadata=metadata
)
# [/DEF:_log:Function]
# [DEF:debug:Function]
# @PURPOSE: Log a DEBUG level message.
# @PARAM: message (str) - Log message.
# @PARAM: source (Optional[str]) - Override source.
# @PARAM: metadata (Optional[Dict]) - Additional data.
def debug(
self,
message: str,
source: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> None:
self._log("DEBUG", message, source, metadata)
# [/DEF:debug:Function]
# [DEF:info:Function]
# @PURPOSE: Log an INFO level message.
# @PARAM: message (str) - Log message.
# @PARAM: source (Optional[str]) - Override source.
# @PARAM: metadata (Optional[Dict]) - Additional data.
def info(
self,
message: str,
source: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> None:
self._log("INFO", message, source, metadata)
# [/DEF:info:Function]
# [DEF:warning:Function]
# @PURPOSE: Log a WARNING level message.
# @PARAM: message (str) - Log message.
# @PARAM: source (Optional[str]) - Override source.
# @PARAM: metadata (Optional[Dict]) - Additional data.
def warning(
self,
message: str,
source: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> None:
self._log("WARNING", message, source, metadata)
# [/DEF:warning:Function]
# [DEF:error:Function]
# @PURPOSE: Log an ERROR level message.
# @PARAM: message (str) - Log message.
# @PARAM: source (Optional[str]) - Override source.
# @PARAM: metadata (Optional[Dict]) - Additional data.
def error(
self,
message: str,
source: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> None:
self._log("ERROR", message, source, metadata)
# [/DEF:error:Function]
# [DEF:progress:Function]
# @PURPOSE: Log a progress update with percentage.
# @PRE: percent is between 0 and 100.
# @POST: Log entry with progress metadata added.
# @PARAM: message (str) - Progress message.
# @PARAM: percent (float) - Progress percentage (0-100).
# @PARAM: source (Optional[str]) - Override source.
def progress(
self,
message: str,
percent: float,
source: Optional[str] = None
) -> None:
"""Log a progress update with percentage."""
metadata = {"progress": min(100, max(0, percent))}
self._log("INFO", message, source, metadata)
# [/DEF:progress:Function]
# [/DEF:TaskLogger:Class]
# [/DEF:TaskLoggerModule:Module]

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.models.connection:Module]
#
# @TIER: TRIVIAL
# @SEMANTICS: database, connection, configuration, sqlalchemy, sqlite
# @PURPOSE: Defines the database schema for external database connection configurations.
# @LAYER: Domain
@@ -15,6 +16,7 @@ import uuid
# [/SECTION]
# [DEF:ConnectionConfig:Class]
# @TIER: TRIVIAL
# @PURPOSE: Stores credentials for external databases used for column mapping.
class ConnectionConfig(Base):
__tablename__ = "connection_configs"

View File

@@ -1,4 +1,5 @@
# [DEF:GitModels:Module]
# @TIER: TRIVIAL
# @SEMANTICS: git, models, sqlalchemy, database, schema
# @PURPOSE: Git-specific SQLAlchemy models for configuration and repository tracking.
# @LAYER: Model
@@ -26,11 +27,10 @@ class SyncStatus(str, enum.Enum):
DIRTY = "DIRTY"
CONFLICT = "CONFLICT"
# [DEF:GitServerConfig:Class]
# @TIER: TRIVIAL
# @PURPOSE: Configuration for a Git server connection.
class GitServerConfig(Base):
"""
[DEF:GitServerConfig:Class]
Configuration for a Git server connection.
"""
__tablename__ = "git_server_configs"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
@@ -41,12 +41,12 @@ class GitServerConfig(Base):
default_repository = Column(String(255), nullable=True)
status = Column(Enum(GitStatus), default=GitStatus.UNKNOWN)
last_validated = Column(DateTime, default=datetime.utcnow)
# [/DEF:GitServerConfig:Class]
# [DEF:GitRepository:Class]
# @TIER: TRIVIAL
# @PURPOSE: Tracking for a local Git repository linked to a dashboard.
class GitRepository(Base):
"""
[DEF:GitRepository:Class]
Tracking for a local Git repository linked to a dashboard.
"""
__tablename__ = "git_repositories"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
@@ -56,12 +56,12 @@ class GitRepository(Base):
local_path = Column(String(255), nullable=False)
current_branch = Column(String(255), default="main")
sync_status = Column(Enum(SyncStatus), default=SyncStatus.CLEAN)
# [/DEF:GitRepository:Class]
# [DEF:DeploymentEnvironment:Class]
# @TIER: TRIVIAL
# @PURPOSE: Target Superset environments for dashboard deployment.
class DeploymentEnvironment(Base):
"""
[DEF:DeploymentEnvironment:Class]
Target Superset environments for dashboard deployment.
"""
__tablename__ = "deployment_environments"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
@@ -69,5 +69,6 @@ class DeploymentEnvironment(Base):
superset_url = Column(String(255), nullable=False)
superset_token = Column(String(255), nullable=False)
is_active = Column(Boolean, default=True)
# [/DEF:DeploymentEnvironment:Class]
# [/DEF:GitModels:Module]
# [/DEF:GitModels:Module]

View File

@@ -59,6 +59,7 @@ class DatabaseMapping(Base):
# [/DEF:DatabaseMapping:Class]
# [DEF:MigrationJob:Class]
# @TIER: TRIVIAL
# @PURPOSE: Represents a single migration execution job.
class MigrationJob(Base):
__tablename__ = "migration_jobs"

View File

@@ -1,9 +1,16 @@
# [DEF:backend.src.models.storage:Module]
# @TIER: TRIVIAL
# @SEMANTICS: storage, file, model, pydantic
# @PURPOSE: Data models for the storage system.
# @LAYER: Domain
from datetime import datetime
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
# [DEF:FileCategory:Class]
# @TIER: TRIVIAL
# @PURPOSE: Enumeration of supported file categories in the storage system.
class FileCategory(str, Enum):
BACKUP = "backups"
@@ -11,6 +18,7 @@ class FileCategory(str, Enum):
# [/DEF:FileCategory:Class]
# [DEF:StorageConfig:Class]
# @TIER: TRIVIAL
# @PURPOSE: Configuration model for the storage system, defining paths and naming patterns.
class StorageConfig(BaseModel):
root_path: str = Field(default="backups", description="Absolute path to the storage root directory.")
@@ -20,6 +28,7 @@ class StorageConfig(BaseModel):
# [/DEF:StorageConfig:Class]
# [DEF:StoredFile:Class]
# @TIER: TRIVIAL
# @PURPOSE: Data model representing metadata for a file stored in the system.
class StoredFile(BaseModel):
name: str = Field(..., description="Name of the file (including extension).")
@@ -28,4 +37,6 @@ class StoredFile(BaseModel):
created_at: datetime = Field(..., description="Creation timestamp.")
category: FileCategory = Field(..., description="Category of the file.")
mime_type: Optional[str] = Field(None, description="MIME type of the file.")
# [/DEF:StoredFile:Class]
# [/DEF:StoredFile:Class]
# [/DEF:backend.src.models.storage:Module]

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.models.task:Module]
#
# @TIER: TRIVIAL
# @SEMANTICS: database, task, record, sqlalchemy, sqlite
# @PURPOSE: Defines the database schema for task execution records.
# @LAYER: Domain
@@ -8,13 +9,14 @@
# @INVARIANT: All primary keys are UUID strings.
# [SECTION: IMPORTS]
from sqlalchemy import Column, String, DateTime, JSON, ForeignKey
from sqlalchemy import Column, String, DateTime, JSON, ForeignKey, Text, Integer, Index
from sqlalchemy.sql import func
from .mapping import Base
import uuid
# [/SECTION]
# [DEF:TaskRecord:Class]
# @TIER: TRIVIAL
# @PURPOSE: Represents a persistent record of a task execution.
class TaskRecord(Base):
__tablename__ = "task_records"
@@ -25,11 +27,35 @@ class TaskRecord(Base):
environment_id = Column(String, ForeignKey("environments.id"), nullable=True)
started_at = Column(DateTime(timezone=True), nullable=True)
finished_at = Column(DateTime(timezone=True), nullable=True)
logs = Column(JSON, nullable=True) # Store structured logs as JSON
logs = Column(JSON, nullable=True) # Store structured logs as JSON (legacy, kept for backward compatibility)
error = Column(String, nullable=True)
result = Column(JSON, nullable=True)
created_at = Column(DateTime(timezone=True), server_default=func.now())
params = Column(JSON, nullable=True)
# [/DEF:TaskRecord:Class]
# [DEF:TaskLogRecord:Class]
# @PURPOSE: Represents a single persistent log entry for a task.
# @TIER: CRITICAL
# @RELATION: DEPENDS_ON -> TaskRecord
# @INVARIANT: Each log entry belongs to exactly one task.
class TaskLogRecord(Base):
__tablename__ = "task_logs"
id = Column(Integer, primary_key=True, autoincrement=True)
task_id = Column(String, ForeignKey("task_records.id", ondelete="CASCADE"), nullable=False, index=True)
timestamp = Column(DateTime(timezone=True), nullable=False, index=True)
level = Column(String(16), nullable=False) # INFO, WARNING, ERROR, DEBUG
source = Column(String(64), nullable=False, default="system") # plugin, superset_api, git, etc.
message = Column(Text, nullable=False)
metadata_json = Column(Text, nullable=True) # JSON string for additional metadata
# Composite indexes for efficient filtering
__table_args__ = (
Index('ix_task_logs_task_timestamp', 'task_id', 'timestamp'),
Index('ix_task_logs_task_level', 'task_id', 'level'),
Index('ix_task_logs_task_source', 'task_id', 'source'),
)
# [/DEF:TaskLogRecord:Class]
# [/DEF:backend.src.models.task:Module]

View File

@@ -5,13 +5,14 @@
# @RELATION: IMPLEMENTS -> PluginBase
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @RELATION: USES -> TaskContext
from typing import Dict, Any
from typing import Dict, Any, Optional
from pathlib import Path
from requests.exceptions import RequestException
from ..core.plugin_base import PluginBase
from ..core.logger import belief_scope
from ..core.logger import belief_scope, logger as app_logger
from ..core.superset_client import SupersetClient
from ..core.utils.network import SupersetAPIError
from ..core.utils.fileio import (
@@ -23,6 +24,7 @@ from ..core.utils.fileio import (
RetentionPolicy
)
from ..dependencies import get_config_manager
from ..core.task_manager.context import TaskContext
# [DEF:BackupPlugin:Class]
# @PURPOSE: Implementation of the backup plugin logic.
@@ -110,11 +112,12 @@ class BackupPlugin(PluginBase):
# [/DEF:get_schema:Function]
# [DEF:execute:Function]
# @PURPOSE: Executes the dashboard backup logic.
# @PURPOSE: Executes the dashboard backup logic with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Backup parameters (env, backup_path).
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: Target environment must be configured. params must be a dictionary.
# @POST: All dashboards are exported and archived.
async def execute(self, params: Dict[str, Any]):
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
with belief_scope("execute"):
config_manager = get_config_manager()
env_id = params.get("environment_id")
@@ -133,8 +136,14 @@ class BackupPlugin(PluginBase):
# Use 'backups' subfolder within the storage root
backup_path = Path(storage_settings.root_path) / "backups"
from ..core.logger import logger as app_logger
app_logger.info(f"[BackupPlugin][Entry] Starting backup for {env}.")
# Use TaskContext logger if available, otherwise fall back to app_logger
log = context.logger if context else app_logger
# Create sub-loggers for different components
superset_log = log.with_source("superset_api") if context else log
storage_log = log.with_source("storage") if context else log
log.info(f"Starting backup for environment: {env}")
try:
config_manager = get_config_manager()
@@ -148,24 +157,30 @@ class BackupPlugin(PluginBase):
client = SupersetClient(env_config)
dashboard_count, dashboard_meta = client.get_dashboards()
app_logger.info(f"[BackupPlugin][Progress] Found {dashboard_count} dashboards to export in {env}.")
superset_log.info(f"Found {dashboard_count} dashboards to export")
if dashboard_count == 0:
app_logger.info("[BackupPlugin][Exit] No dashboards to back up.")
log.info("No dashboards to back up")
return
for db in dashboard_meta:
total = len(dashboard_meta)
for idx, db in enumerate(dashboard_meta, 1):
dashboard_id = db.get('id')
dashboard_title = db.get('dashboard_title', 'Unknown Dashboard')
if not dashboard_id:
continue
# Report progress
progress_pct = (idx / total) * 100
log.progress(f"Backing up dashboard: {dashboard_title}", percent=progress_pct)
try:
dashboard_base_dir_name = sanitize_filename(f"{dashboard_title}")
dashboard_dir = backup_path / env.upper() / dashboard_base_dir_name
dashboard_dir.mkdir(parents=True, exist_ok=True)
zip_content, filename = client.export_dashboard(dashboard_id)
superset_log.debug(f"Exported dashboard: {dashboard_title}")
save_and_unpack_dashboard(
zip_content=zip_content,
@@ -175,18 +190,19 @@ class BackupPlugin(PluginBase):
)
archive_exports(str(dashboard_dir), policy=RetentionPolicy())
storage_log.debug(f"Archived dashboard: {dashboard_title}")
except (SupersetAPIError, RequestException, IOError, OSError) as db_error:
app_logger.error(f"[BackupPlugin][Failure] Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}", exc_info=True)
log.error(f"Failed to export dashboard {dashboard_title} (ID: {dashboard_id}): {db_error}")
continue
consolidate_archive_folders(backup_path / env.upper())
remove_empty_directories(str(backup_path / env.upper()))
app_logger.info(f"[BackupPlugin][CoherenceCheck:Passed] Backup logic completed for {env}.")
log.info(f"Backup completed successfully for {env}")
except (RequestException, IOError, KeyError) as e:
app_logger.critical(f"[BackupPlugin][Failure] Fatal error during backup for {env}: {e}", exc_info=True)
log.error(f"Fatal error during backup for {env}: {e}")
raise e
# [/DEF:execute:Function]
# [/DEF:BackupPlugin:Class]

View File

@@ -3,6 +3,7 @@
# @PURPOSE: Implements a plugin for system diagnostics and debugging Superset API responses.
# @LAYER: Plugins
# @RELATION: Inherits from PluginBase. Uses SupersetClient from core.
# @RELATION: USES -> TaskContext
# @CONSTRAINT: Must use belief_scope for logging.
# [SECTION: IMPORTS]
@@ -10,6 +11,7 @@ from typing import Dict, Any, Optional
from ..core.plugin_base import PluginBase
from ..core.superset_client import SupersetClient
from ..core.logger import logger, belief_scope
from ..core.task_manager.context import TaskContext
# [/SECTION]
# [DEF:DebugPlugin:Class]
@@ -114,20 +116,29 @@ class DebugPlugin(PluginBase):
# [/DEF:get_schema:Function]
# [DEF:execute:Function]
# @PURPOSE: Executes the debug logic.
# @PURPOSE: Executes the debug logic with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Debug parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: action must be provided in params.
# @POST: Debug action is executed and results returned.
# @RETURN: Dict[str, Any] - Execution results.
async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None) -> Dict[str, Any]:
with belief_scope("execute"):
action = params.get("action")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
debug_log = log.with_source("debug") if context else log
superset_log = log.with_source("superset_api") if context else log
debug_log.info(f"Executing debug action: {action}")
if action == "test-db-api":
return await self._test_db_api(params)
return await self._test_db_api(params, superset_log)
elif action == "get-dataset-structure":
return await self._get_dataset_structure(params)
return await self._get_dataset_structure(params, superset_log)
else:
debug_log.error(f"Unknown action: {action}")
raise ValueError(f"Unknown action: {action}")
# [/DEF:execute:Function]
@@ -136,33 +147,37 @@ class DebugPlugin(PluginBase):
# @PRE: source_env and target_env params exist in params.
# @POST: Returns DB counts for both envs.
# @PARAM: params (Dict) - Plugin parameters.
# @PARAM: log - Logger instance for superset_api source.
# @RETURN: Dict - Comparison results.
async def _test_db_api(self, params: Dict[str, Any]) -> Dict[str, Any]:
async def _test_db_api(self, params: Dict[str, Any], log) -> Dict[str, Any]:
with belief_scope("_test_db_api"):
source_env_name = params.get("source_env")
target_env_name = params.get("target_env")
target_env_name = params.get("target_env")
if not source_env_name or not target_env_name:
raise ValueError("source_env and target_env are required for test-db-api")
if not source_env_name or not target_env_name:
raise ValueError("source_env and target_env are required for test-db-api")
from ..dependencies import get_config_manager
config_manager = get_config_manager()
from ..dependencies import get_config_manager
config_manager = get_config_manager()
results = {}
for name in [source_env_name, target_env_name]:
env_config = config_manager.get_environment(name)
if not env_config:
raise ValueError(f"Environment '{name}' not found.")
results = {}
for name in [source_env_name, target_env_name]:
log.info(f"Testing database API for environment: {name}")
env_config = config_manager.get_environment(name)
if not env_config:
log.error(f"Environment '{name}' not found.")
raise ValueError(f"Environment '{name}' not found.")
client = SupersetClient(env_config)
client.authenticate()
count, dbs = client.get_databases()
results[name] = {
"count": count,
"databases": dbs
}
client = SupersetClient(env_config)
client.authenticate()
count, dbs = client.get_databases()
log.debug(f"Found {count} databases in {name}")
results[name] = {
"count": count,
"databases": dbs
}
return results
return results
# [/DEF:_test_db_api:Function]
# [DEF:_get_dataset_structure:Function]
@@ -170,26 +185,31 @@ class DebugPlugin(PluginBase):
# @PRE: env and dataset_id params exist in params.
# @POST: Returns dataset JSON structure.
# @PARAM: params (Dict) - Plugin parameters.
# @PARAM: log - Logger instance for superset_api source.
# @RETURN: Dict - Dataset structure.
async def _get_dataset_structure(self, params: Dict[str, Any]) -> Dict[str, Any]:
async def _get_dataset_structure(self, params: Dict[str, Any], log) -> Dict[str, Any]:
with belief_scope("_get_dataset_structure"):
env_name = params.get("env")
dataset_id = params.get("dataset_id")
dataset_id = params.get("dataset_id")
if not env_name or dataset_id is None:
raise ValueError("env and dataset_id are required for get-dataset-structure")
if not env_name or dataset_id is None:
raise ValueError("env and dataset_id are required for get-dataset-structure")
from ..dependencies import get_config_manager
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
raise ValueError(f"Environment '{env_name}' not found.")
log.info(f"Fetching structure for dataset {dataset_id} in {env_name}")
client = SupersetClient(env_config)
client.authenticate()
from ..dependencies import get_config_manager
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
log.error(f"Environment '{env_name}' not found.")
raise ValueError(f"Environment '{env_name}' not found.")
client = SupersetClient(env_config)
client.authenticate()
dataset_response = client.get_dataset(dataset_id)
return dataset_response.get('result') or {}
dataset_response = client.get_dataset(dataset_id)
log.debug(f"Retrieved dataset structure for {dataset_id}")
return dataset_response.get('result') or {}
# [/DEF:_get_dataset_structure:Function]
# [/DEF:DebugPlugin:Class]

View File

@@ -61,6 +61,7 @@ class GitLLMExtension:
return "Update dashboard configurations (LLM generation failed)"
return response.choices[0].message.content.strip()
# [/DEF:suggest_commit_message:Function]
# [/DEF:GitLLMExtension:Class]
# [/DEF:backend/src/plugins/git/llm_extension:Module]

View File

@@ -7,6 +7,7 @@
# @RELATION: USES -> src.services.git_service.GitService
# @RELATION: USES -> src.core.superset_client.SupersetClient
# @RELATION: USES -> src.core.config_manager.ConfigManager
# @RELATION: USES -> TaskContext
#
# @INVARIANT: Все операции с Git должны выполняться через GitService.
# @CONSTRAINT: Плагин работает только с распакованными YAML-экспортами Superset.
@@ -20,9 +21,10 @@ from pathlib import Path
from typing import Dict, Any, Optional
from src.core.plugin_base import PluginBase
from src.services.git_service import GitService
from src.core.logger import logger, belief_scope
from src.core.logger import logger as app_logger, belief_scope
from src.core.config_manager import ConfigManager
from src.core.superset_client import SupersetClient
from src.core.task_manager.context import TaskContext
# [/SECTION]
# [DEF:GitPlugin:Class]
@@ -35,7 +37,7 @@ class GitPlugin(PluginBase):
# @POST: Инициализированы git_service и config_manager.
def __init__(self):
with belief_scope("GitPlugin.__init__"):
logger.info("[GitPlugin.__init__][Entry] Initializing GitPlugin.")
app_logger.info("Initializing GitPlugin.")
self.git_service = GitService()
# Robust config path resolution:
@@ -50,13 +52,13 @@ class GitPlugin(PluginBase):
try:
from src.dependencies import config_manager
self.config_manager = config_manager
logger.info("[GitPlugin.__init__][Exit] GitPlugin initialized using shared config_manager.")
app_logger.info("GitPlugin initialized using shared config_manager.")
return
except:
config_path = "config.json"
self.config_manager = ConfigManager(config_path)
logger.info(f"[GitPlugin.__init__][Exit] GitPlugin initialized with {config_path}")
app_logger.info(f"GitPlugin initialized with {config_path}")
# [/DEF:__init__:Function]
@property
@@ -136,33 +138,41 @@ class GitPlugin(PluginBase):
logger.info("[GitPlugin.initialize][Action] Initializing Git Integration Plugin logic.")
# [DEF:execute:Function]
# @PURPOSE: Основной метод выполнения задач плагина.
# @PURPOSE: Основной метод выполнения задач плагина с поддержкой TaskContext.
# @PRE: task_data содержит 'operation' и 'dashboard_id'.
# @POST: Возвращает результат выполнения операции.
# @PARAM: task_data (Dict[str, Any]) - Данные задачи.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @RETURN: Dict[str, Any] - Статус и сообщение.
# @RELATION: CALLS -> self._handle_sync
# @RELATION: CALLS -> self._handle_deploy
async def execute(self, task_data: Dict[str, Any]) -> Dict[str, Any]:
async def execute(self, task_data: Dict[str, Any], context: Optional[TaskContext] = None) -> Dict[str, Any]:
with belief_scope("GitPlugin.execute"):
operation = task_data.get("operation")
dashboard_id = task_data.get("dashboard_id")
logger.info(f"[GitPlugin.execute][Entry] Executing operation: {operation} for dashboard {dashboard_id}")
# Use TaskContext logger if available, otherwise fall back to app_logger
log = context.logger if context else app_logger
# Create sub-loggers for different components
git_log = log.with_source("git") if context else log
superset_log = log.with_source("superset_api") if context else log
log.info(f"Executing operation: {operation} for dashboard {dashboard_id}")
if operation == "sync":
source_env_id = task_data.get("source_env_id")
result = await self._handle_sync(dashboard_id, source_env_id)
result = await self._handle_sync(dashboard_id, source_env_id, log, git_log, superset_log)
elif operation == "deploy":
env_id = task_data.get("environment_id")
result = await self._handle_deploy(dashboard_id, env_id)
result = await self._handle_deploy(dashboard_id, env_id, log, git_log, superset_log)
elif operation == "history":
result = {"status": "success", "message": "History available via API"}
else:
logger.error(f"[GitPlugin.execute][Coherence:Failed] Unknown operation: {operation}")
log.error(f"Unknown operation: {operation}")
raise ValueError(f"Unknown operation: {operation}")
logger.info(f"[GitPlugin.execute][Exit] Operation {operation} completed.")
log.info(f"Operation {operation} completed.")
return result
# [/DEF:execute:Function]
@@ -176,13 +186,13 @@ class GitPlugin(PluginBase):
# @SIDE_EFFECT: Изменяет файлы в локальной рабочей директории репозитория.
# @RELATION: CALLS -> src.services.git_service.GitService.get_repo
# @RELATION: CALLS -> src.core.superset_client.SupersetClient.export_dashboard
async def _handle_sync(self, dashboard_id: int, source_env_id: Optional[str] = None) -> Dict[str, str]:
async def _handle_sync(self, dashboard_id: int, source_env_id: Optional[str] = None, log=None, git_log=None, superset_log=None) -> Dict[str, str]:
with belief_scope("GitPlugin._handle_sync"):
try:
# 1. Получение репозитория
repo = self.git_service.get_repo(dashboard_id)
repo_path = Path(repo.working_dir)
logger.info(f"[_handle_sync][Action] Target repo path: {repo_path}")
git_log.info(f"Target repo path: {repo_path}")
# 2. Настройка клиента Superset
env = self._get_env(source_env_id)
@@ -190,11 +200,11 @@ class GitPlugin(PluginBase):
client.authenticate()
# 3. Экспорт дашборда
logger.info(f"[_handle_sync][Action] Exporting dashboard {dashboard_id} from {env.name}")
superset_log.info(f"Exporting dashboard {dashboard_id} from {env.name}")
zip_bytes, _ = client.export_dashboard(dashboard_id)
# 4. Распаковка с выравниванием структуры (flattening)
logger.info(f"[_handle_sync][Action] Unpacking export to {repo_path}")
git_log.info(f"Unpacking export to {repo_path}")
# Список папок/файлов, которые мы ожидаем от Superset
managed_dirs = ["dashboards", "charts", "datasets", "databases"]
@@ -218,7 +228,7 @@ class GitPlugin(PluginBase):
raise ValueError("Export ZIP is empty")
root_folder = namelist[0].split('/')[0]
logger.info(f"[_handle_sync][Action] Detected root folder in ZIP: {root_folder}")
git_log.info(f"Detected root folder in ZIP: {root_folder}")
for member in zf.infolist():
if member.filename.startswith(root_folder + "/") and len(member.filename) > len(root_folder) + 1:
@@ -254,10 +264,13 @@ class GitPlugin(PluginBase):
# @POST: Дашборд импортирован в целевой Superset.
# @PARAM: dashboard_id (int) - ID дашборда.
# @PARAM: env_id (str) - ID целевого окружения.
# @PARAM: log - Main logger instance.
# @PARAM: git_log - Git-specific logger instance.
# @PARAM: superset_log - Superset API-specific logger instance.
# @RETURN: Dict[str, Any] - Результат деплоя.
# @SIDE_EFFECT: Создает и удаляет временный ZIP-файл.
# @RELATION: CALLS -> src.core.superset_client.SupersetClient.import_dashboard
async def _handle_deploy(self, dashboard_id: int, env_id: str) -> Dict[str, Any]:
async def _handle_deploy(self, dashboard_id: int, env_id: str, log=None, git_log=None, superset_log=None) -> Dict[str, Any]:
with belief_scope("GitPlugin._handle_deploy"):
try:
if not env_id:
@@ -268,7 +281,7 @@ class GitPlugin(PluginBase):
repo_path = Path(repo.working_dir)
# 2. Упаковка в ZIP
logger.info(f"[_handle_deploy][Action] Packing repository {repo_path} for deployment.")
git_log.info(f"Packing repository {repo_path} for deployment.")
zip_buffer = io.BytesIO()
# Superset expects a root directory in the ZIP (e.g., dashboard_export_20240101T000000/)
@@ -297,7 +310,7 @@ class GitPlugin(PluginBase):
# 4. Импорт
temp_zip_path = repo_path / f"deploy_{dashboard_id}.zip"
logger.info(f"[_handle_deploy][Action] Saving temporary zip to {temp_zip_path}")
git_log.info(f"Saving temporary zip to {temp_zip_path}")
with open(temp_zip_path, "wb") as f:
f.write(zip_buffer.getvalue())

View File

@@ -7,6 +7,7 @@
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.ScreenshotService
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.LLMClient
# @RELATION: CALLS -> backend.src.services.llm_provider.LLMProviderService
# @RELATION: USES -> TaskContext
# @INVARIANT: All LLM interactions must be executed as asynchronous tasks.
from typing import Dict, Any, Optional, List
@@ -23,6 +24,7 @@ from ...core.superset_client import SupersetClient
from .service import ScreenshotService, LLMClient
from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue
from ...models.llm import ValidationRecord
from ...core.task_manager.context import TaskContext
# [DEF:DashboardValidationPlugin:Class]
# @PURPOSE: Plugin for automated dashboard health analysis using LLMs.
@@ -56,28 +58,27 @@ class DashboardValidationPlugin(PluginBase):
}
# [DEF:DashboardValidationPlugin.execute:Function]
# @PURPOSE: Executes the dashboard validation task.
# @PURPOSE: Executes the dashboard validation task with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Validation parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: params contains dashboard_id, environment_id, and provider_id.
# @POST: Returns a dictionary with validation results and persists them to the database.
# @SIDE_EFFECT: Captures a screenshot, calls LLM API, and writes to the database.
async def execute(self, params: Dict[str, Any]):
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
# Create sub-loggers for different components
llm_log = log.with_source("llm") if context else log
screenshot_log = log.with_source("screenshot") if context else log
superset_log = log.with_source("superset_api") if context else log
log.info(f"Executing {self.name} with params: {params}")
dashboard_id = params.get("dashboard_id")
env_id = params.get("environment_id")
provider_id = params.get("provider_id")
task_id = params.get("_task_id")
# Helper to log to both app logger and task manager logs
def task_log(level: str, message: str, context: Optional[Dict] = None):
logger.log(getattr(logging, level.upper()), message)
if task_id:
from ...dependencies import get_task_manager
try:
tm = get_task_manager()
tm._add_log(task_id, level.upper(), message, context)
except: pass
db = SessionLocal()
try:
@@ -86,25 +87,26 @@ class DashboardValidationPlugin(PluginBase):
config_mgr = get_config_manager()
env = config_mgr.get_environment(env_id)
if not env:
log.error(f"Environment {env_id} not found")
raise ValueError(f"Environment {env_id} not found")
# 2. Get LLM Provider
llm_service = LLMProviderService(db)
db_provider = llm_service.get_provider(provider_id)
if not db_provider:
log.error(f"LLM Provider {provider_id} not found")
raise ValueError(f"LLM Provider {provider_id} not found")
logger.info(f"[DashboardValidationPlugin.execute] Retrieved provider config:")
logger.info(f"[DashboardValidationPlugin.execute] Provider ID: {db_provider.id}")
logger.info(f"[DashboardValidationPlugin.execute] Provider Name: {db_provider.name}")
logger.info(f"[DashboardValidationPlugin.execute] Provider Type: {db_provider.provider_type}")
logger.info(f"[DashboardValidationPlugin.execute] Base URL: {db_provider.base_url}")
logger.info(f"[DashboardValidationPlugin.execute] Default Model: {db_provider.default_model}")
logger.info(f"[DashboardValidationPlugin.execute] Is Active: {db_provider.is_active}")
llm_log.debug(f"Retrieved provider config:")
llm_log.debug(f" Provider ID: {db_provider.id}")
llm_log.debug(f" Provider Name: {db_provider.name}")
llm_log.debug(f" Provider Type: {db_provider.provider_type}")
llm_log.debug(f" Base URL: {db_provider.base_url}")
llm_log.debug(f" Default Model: {db_provider.default_model}")
llm_log.debug(f" Is Active: {db_provider.is_active}")
api_key = llm_service.get_decrypted_api_key(provider_id)
logger.info(f"[DashboardValidationPlugin.execute] API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
logger.info(f"[DashboardValidationPlugin.execute] API Key Length: {len(api_key) if api_key else 0}")
llm_log.debug(f"API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
# Check if API key was successfully decrypted
if not api_key:
@@ -124,7 +126,9 @@ class DashboardValidationPlugin(PluginBase):
filename = f"{dashboard_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
screenshot_path = os.path.join(screenshots_dir, filename)
screenshot_log.info(f"Capturing screenshot for dashboard {dashboard_id}")
await screenshot_service.capture_dashboard(dashboard_id, screenshot_path)
screenshot_log.debug(f"Screenshot saved to: {screenshot_path}")
# 4. Fetch Logs (from Environment /api/v1/log/)
logs = []
@@ -147,6 +151,7 @@ class DashboardValidationPlugin(PluginBase):
"page_size": 100
}
superset_log.debug(f"Fetching logs for dashboard {dashboard_id}")
response = client.network.request(
method="GET",
endpoint="/log/",
@@ -162,9 +167,10 @@ class DashboardValidationPlugin(PluginBase):
if not logs:
logs = ["No recent logs found for this dashboard."]
superset_log.debug("No recent logs found for this dashboard")
except Exception as e:
logger.warning(f"Failed to fetch logs from environment: {e}")
superset_log.warning(f"Failed to fetch logs from environment: {e}")
logs = [f"Error fetching remote logs: {str(e)}"]
# 5. Analyze with LLM
@@ -175,14 +181,15 @@ class DashboardValidationPlugin(PluginBase):
default_model=db_provider.default_model
)
llm_log.info(f"Analyzing dashboard {dashboard_id} with LLM")
analysis = await llm_client.analyze_dashboard(screenshot_path, logs)
# Log analysis summary to task logs for better visibility
task_log("INFO", f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
task_log("INFO", f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
llm_log.info(f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
llm_log.info(f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
if analysis.get("issues"):
for i, issue in enumerate(analysis["issues"]):
task_log("INFO", f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")
llm_log.info(f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")
# 6. Persist Result
validation_result = ValidationResult(
@@ -207,13 +214,13 @@ class DashboardValidationPlugin(PluginBase):
# 7. Notification on failure (US1 / FR-015)
if validation_result.status == ValidationStatus.FAIL:
task_log("WARNING", f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
log.warning(f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
# Placeholder for Email/Pulse notification dispatch
# In a real implementation, we would call a NotificationService here
# with a payload containing the summary and a link to the report.
# Final log to ensure all analysis is visible in task logs
task_log("INFO", f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
log.info(f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
return validation_result.dict()
@@ -254,13 +261,22 @@ class DocumentationPlugin(PluginBase):
}
# [DEF:DocumentationPlugin.execute:Function]
# @PURPOSE: Executes the dataset documentation task.
# @PURPOSE: Executes the dataset documentation task with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Documentation parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: params contains dataset_id, environment_id, and provider_id.
# @POST: Returns generated documentation and updates the dataset in Superset.
# @SIDE_EFFECT: Calls LLM API and updates dataset metadata in Superset.
async def execute(self, params: Dict[str, Any]):
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
# Create sub-loggers for different components
llm_log = log.with_source("llm") if context else log
superset_log = log.with_source("superset_api") if context else log
log.info(f"Executing {self.name} with params: {params}")
dataset_id = params.get("dataset_id")
env_id = params.get("environment_id")
@@ -273,25 +289,25 @@ class DocumentationPlugin(PluginBase):
config_mgr = get_config_manager()
env = config_mgr.get_environment(env_id)
if not env:
log.error(f"Environment {env_id} not found")
raise ValueError(f"Environment {env_id} not found")
# 2. Get LLM Provider
llm_service = LLMProviderService(db)
db_provider = llm_service.get_provider(provider_id)
if not db_provider:
log.error(f"LLM Provider {provider_id} not found")
raise ValueError(f"LLM Provider {provider_id} not found")
logger.info(f"[DocumentationPlugin.execute] Retrieved provider config:")
logger.info(f"[DocumentationPlugin.execute] Provider ID: {db_provider.id}")
logger.info(f"[DocumentationPlugin.execute] Provider Name: {db_provider.name}")
logger.info(f"[DocumentationPlugin.execute] Provider Type: {db_provider.provider_type}")
logger.info(f"[DocumentationPlugin.execute] Base URL: {db_provider.base_url}")
logger.info(f"[DocumentationPlugin.execute] Default Model: {db_provider.default_model}")
logger.info(f"[DocumentationPlugin.execute] Is Active: {db_provider.is_active}")
llm_log.debug(f"Retrieved provider config:")
llm_log.debug(f" Provider ID: {db_provider.id}")
llm_log.debug(f" Provider Name: {db_provider.name}")
llm_log.debug(f" Provider Type: {db_provider.provider_type}")
llm_log.debug(f" Base URL: {db_provider.base_url}")
llm_log.debug(f" Default Model: {db_provider.default_model}")
api_key = llm_service.get_decrypted_api_key(provider_id)
logger.info(f"[DocumentationPlugin.execute] API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
logger.info(f"[DocumentationPlugin.execute] API Key Length: {len(api_key) if api_key else 0}")
llm_log.debug(f"API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
# Check if API key was successfully decrypted
if not api_key:
@@ -305,10 +321,8 @@ class DocumentationPlugin(PluginBase):
from ...core.superset_client import SupersetClient
client = SupersetClient(env)
# Optimistic locking check (T045)
superset_log.debug(f"Fetching dataset {dataset_id}")
dataset = client.get_dataset(int(dataset_id))
# dataset structure might vary, ensure we get the right field
original_changed_on = dataset.get("changed_on_utc") or dataset.get("result", {}).get("changed_on_utc")
# Extract columns and existing descriptions
columns_data = []
@@ -318,6 +332,7 @@ class DocumentationPlugin(PluginBase):
"type": col.get("type"),
"description": col.get("description")
})
superset_log.debug(f"Extracted {len(columns_data)} columns from dataset")
# 4. Construct Prompt & Analyze (US2 / T025)
llm_client = LLMClient(
@@ -345,12 +360,10 @@ class DocumentationPlugin(PluginBase):
"""
# Using a generic chat completion for text-only US2
# We use the shared get_json_completion method from LLMClient
llm_log.info(f"Generating documentation for dataset {dataset_id}")
doc_result = await llm_client.get_json_completion([{"role": "user", "content": prompt}])
# 5. Update Metadata (US2 / T026)
# This part normally goes to mapping_service, but we implement the logic here for the plugin flow
# We'll update the dataset in Superset
update_payload = {
"description": doc_result["dataset_description"],
"columns": []
@@ -365,8 +378,11 @@ class DocumentationPlugin(PluginBase):
"description": col_doc["description"]
})
superset_log.info(f"Updating dataset {dataset_id} with generated documentation")
client.update_dataset(int(dataset_id), update_payload)
log.info(f"Documentation completed for dataset {dataset_id}")
return doc_result
finally:

View File

@@ -39,6 +39,7 @@ def schedule_dashboard_validation(dashboard_id: str, cron_expression: str, param
**_parse_cron(cron_expression)
)
logger.info(f"Scheduled validation for dashboard {dashboard_id} with cron {cron_expression}")
# [/DEF:schedule_dashboard_validation:Function]
# [DEF:_parse_cron:Function]
# @PURPOSE: Basic cron parser placeholder.
@@ -56,5 +57,6 @@ def _parse_cron(cron: str) -> Dict[str, str]:
"month": parts[3],
"day_of_week": parts[4]
}
# [/DEF:_parse_cron:Function]
# [/DEF:backend/src/plugins/llm_analysis/scheduler.py:Module]

View File

@@ -3,6 +3,7 @@
# @PURPOSE: Implements a plugin for mapping dataset columns using external database connections or Excel files.
# @LAYER: Plugins
# @RELATION: Inherits from PluginBase. Uses DatasetMapper from superset_tool.
# @RELATION: USES -> TaskContext
# @CONSTRAINT: Must use belief_scope for logging.
# [SECTION: IMPORTS]
@@ -13,6 +14,7 @@ from ..core.logger import logger, belief_scope
from ..core.database import SessionLocal
from ..models.connection import ConnectionConfig
from ..core.utils.dataset_mapper import DatasetMapper
from ..core.task_manager.context import TaskContext
# [/SECTION]
# [DEF:MapperPlugin:Class]
@@ -128,19 +130,27 @@ class MapperPlugin(PluginBase):
# [/DEF:get_schema:Function]
# [DEF:execute:Function]
# @PURPOSE: Executes the dataset mapping logic.
# @PURPOSE: Executes the dataset mapping logic with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Mapping parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: Params contain valid 'env', 'dataset_id', and 'source'. params must be a dictionary.
# @POST: Updates the dataset in Superset.
# @RETURN: Dict[str, Any] - Execution status.
async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None) -> Dict[str, Any]:
with belief_scope("execute"):
env_name = params.get("env")
dataset_id = params.get("dataset_id")
source = params.get("source")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
# Create sub-loggers for different components
superset_log = log.with_source("superset_api") if context else log
db_log = log.with_source("postgres") if context else log
if not env_name or dataset_id is None or not source:
logger.error("[MapperPlugin.execute][State] Missing required parameters.")
log.error("Missing required parameters: env, dataset_id, source")
raise ValueError("Missing required parameters: env, dataset_id, source")
# Get config and initialize client
@@ -148,7 +158,7 @@ class MapperPlugin(PluginBase):
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
logger.error(f"[MapperPlugin.execute][State] Environment '{env_name}' not found.")
log.error(f"Environment '{env_name}' not found in configuration.")
raise ValueError(f"Environment '{env_name}' not found in configuration.")
client = SupersetClient(env_config)
@@ -158,7 +168,7 @@ class MapperPlugin(PluginBase):
if source == "postgres":
connection_id = params.get("connection_id")
if not connection_id:
logger.error("[MapperPlugin.execute][State] connection_id is required for postgres source.")
log.error("connection_id is required for postgres source.")
raise ValueError("connection_id is required for postgres source.")
# Load connection from DB
@@ -166,7 +176,7 @@ class MapperPlugin(PluginBase):
try:
conn_config = db.query(ConnectionConfig).filter(ConnectionConfig.id == connection_id).first()
if not conn_config:
logger.error(f"[MapperPlugin.execute][State] Connection {connection_id} not found.")
db_log.error(f"Connection {connection_id} not found.")
raise ValueError(f"Connection {connection_id} not found.")
postgres_config = {
@@ -176,10 +186,11 @@ class MapperPlugin(PluginBase):
'host': conn_config.host,
'port': str(conn_config.port) if conn_config.port else '5432'
}
db_log.debug(f"Loaded connection config for {conn_config.host}:{conn_config.port}/{conn_config.database}")
finally:
db.close()
logger.info(f"[MapperPlugin.execute][Action] Starting mapping for dataset {dataset_id} in {env_name}")
log.info(f"Starting mapping for dataset {dataset_id} in {env_name}")
mapper = DatasetMapper()
@@ -193,10 +204,10 @@ class MapperPlugin(PluginBase):
table_name=params.get("table_name"),
table_schema=params.get("table_schema") or "public"
)
logger.info(f"[MapperPlugin.execute][Success] Mapping completed for dataset {dataset_id}")
superset_log.info(f"Mapping completed for dataset {dataset_id}")
return {"status": "success", "dataset_id": dataset_id}
except Exception as e:
logger.error(f"[MapperPlugin.execute][Failure] Mapping failed: {e}")
log.error(f"Mapping failed: {e}")
raise
# [/DEF:execute:Function]

View File

@@ -5,20 +5,22 @@
# @RELATION: IMPLEMENTS -> PluginBase
# @RELATION: DEPENDS_ON -> superset_tool.client
# @RELATION: DEPENDS_ON -> superset_tool.utils
# @RELATION: USES -> TaskContext
from typing import Dict, Any, List
from typing import Dict, Any, List, Optional
from pathlib import Path
import zipfile
import re
from ..core.plugin_base import PluginBase
from ..core.logger import belief_scope
from ..core.logger import belief_scope, logger as app_logger
from ..core.superset_client import SupersetClient
from ..core.utils.fileio import create_temp_file, update_yamls, create_dashboard_export
from ..dependencies import get_config_manager
from ..core.migration_engine import MigrationEngine
from ..core.database import SessionLocal
from ..models.mapping import DatabaseMapping, Environment
from ..core.task_manager.context import TaskContext
# [DEF:MigrationPlugin:Class]
# @PURPOSE: Implementation of the migration plugin logic.
@@ -132,11 +134,12 @@ class MigrationPlugin(PluginBase):
# [/DEF:get_schema:Function]
# [DEF:execute:Function]
# @PURPOSE: Executes the dashboard migration logic.
# @PURPOSE: Executes the dashboard migration logic with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Migration parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: Source and target environments must be configured.
# @POST: Selected dashboards are migrated.
async def execute(self, params: Dict[str, Any]):
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
with belief_scope("MigrationPlugin.execute"):
source_env_id = params.get("source_env_id")
target_env_id = params.get("target_env_id")
@@ -157,74 +160,15 @@ class MigrationPlugin(PluginBase):
from ..dependencies import get_task_manager
tm = get_task_manager()
class TaskLoggerProxy:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the proxy logger.
# @PRE: None.
# @POST: Instance is initialized.
def __init__(self):
with belief_scope("__init__"):
# Initialize parent with dummy values since we override methods
pass
# [/DEF:__init__:Function]
# [DEF:debug:Function]
# @PURPOSE: Logs a debug message to the task manager.
# @PRE: msg is a string.
# @POST: Log is added to task manager if task_id exists.
def debug(self, msg, *args, extra=None, **kwargs):
with belief_scope("debug"):
if task_id: tm._add_log(task_id, "DEBUG", msg, extra or {})
# [/DEF:debug:Function]
# [DEF:info:Function]
# @PURPOSE: Logs an info message to the task manager.
# @PRE: msg is a string.
# @POST: Log is added to task manager if task_id exists.
def info(self, msg, *args, extra=None, **kwargs):
with belief_scope("info"):
if task_id: tm._add_log(task_id, "INFO", msg, extra or {})
# [/DEF:info:Function]
# [DEF:warning:Function]
# @PURPOSE: Logs a warning message to the task manager.
# @PRE: msg is a string.
# @POST: Log is added to task manager if task_id exists.
def warning(self, msg, *args, extra=None, **kwargs):
with belief_scope("warning"):
if task_id: tm._add_log(task_id, "WARNING", msg, extra or {})
# [/DEF:warning:Function]
# [DEF:error:Function]
# @PURPOSE: Logs an error message to the task manager.
# @PRE: msg is a string.
# @POST: Log is added to task manager if task_id exists.
def error(self, msg, *args, extra=None, **kwargs):
with belief_scope("error"):
if task_id: tm._add_log(task_id, "ERROR", msg, extra or {})
# [/DEF:error:Function]
# [DEF:critical:Function]
# @PURPOSE: Logs a critical message to the task manager.
# @PRE: msg is a string.
# @POST: Log is added to task manager if task_id exists.
def critical(self, msg, *args, extra=None, **kwargs):
with belief_scope("critical"):
if task_id: tm._add_log(task_id, "ERROR", msg, extra or {})
# [/DEF:critical:Function]
# [DEF:exception:Function]
# @PURPOSE: Logs an exception message to the task manager.
# @PRE: msg is a string.
# @POST: Log is added to task manager if task_id exists.
def exception(self, msg, *args, **kwargs):
with belief_scope("exception"):
if task_id: tm._add_log(task_id, "ERROR", msg, {"exception": True})
# [/DEF:exception:Function]
logger = TaskLoggerProxy()
logger.info(f"[MigrationPlugin][Entry] Starting migration task.")
logger.info(f"[MigrationPlugin][Action] Params: {params}")
# Use TaskContext logger if available, otherwise fall back to app_logger
log = context.logger if context else app_logger
# Create sub-loggers for different components
superset_log = log.with_source("superset_api") if context else log
migration_log = log.with_source("migration") if context else log
log.info("Starting migration task.")
log.debug(f"Params: {params}")
try:
with belief_scope("execute"):
@@ -251,7 +195,7 @@ class MigrationPlugin(PluginBase):
from_env_name = src_env.name
to_env_name = tgt_env.name
logger.info(f"[MigrationPlugin][State] Resolved environments: {from_env_name} -> {to_env_name}")
log.info(f"Resolved environments: {from_env_name} -> {to_env_name}")
from_c = SupersetClient(src_env)
to_c = SupersetClient(tgt_env)
@@ -270,11 +214,11 @@ class MigrationPlugin(PluginBase):
d for d in all_dashboards if re.search(regex_str, d["dashboard_title"], re.IGNORECASE)
]
else:
logger.warning("[MigrationPlugin][State] No selection criteria provided (selected_ids or dashboard_regex).")
log.warning("No selection criteria provided (selected_ids or dashboard_regex).")
return
if not dashboards_to_migrate:
logger.warning("[MigrationPlugin][State] No dashboards found matching criteria.")
log.warning("No dashboards found matching criteria.")
return
# Fetch mappings from database
@@ -292,7 +236,7 @@ class MigrationPlugin(PluginBase):
DatabaseMapping.target_env_id == tgt_env.id
).all()
db_mapping = {m.source_db_uuid: m.target_db_uuid for m in mappings}
logger.info(f"[MigrationPlugin][State] Loaded {len(db_mapping)} database mappings.")
log.info(f"Loaded {len(db_mapping)} database mappings.")
finally:
db.close()
@@ -311,7 +255,7 @@ class MigrationPlugin(PluginBase):
if not success and replace_db_config:
# Signal missing mapping and wait (only if we care about mappings)
if task_id:
logger.info(f"[MigrationPlugin][Action] Pausing for missing mapping in task {task_id}")
log.info(f"Pausing for missing mapping in task {task_id}")
# In a real scenario, we'd pass the missing DB info to the frontend
# For this task, we'll just simulate the wait
await tm.wait_for_resolution(task_id)
@@ -333,9 +277,9 @@ class MigrationPlugin(PluginBase):
if success:
to_c.import_dashboard(file_name=tmp_new_zip, dash_id=dash_id, dash_slug=dash_slug)
else:
logger.error(f"[MigrationPlugin][Failure] Failed to transform ZIP for dashboard {title}")
migration_log.error(f"Failed to transform ZIP for dashboard {title}")
logger.info(f"[MigrationPlugin][Success] Dashboard {title} imported.")
superset_log.info(f"Dashboard {title} imported.")
except Exception as exc:
# Check for password error
error_msg = str(exc)

View File

@@ -3,6 +3,7 @@
# @PURPOSE: Implements a plugin for searching text patterns across all datasets in a specific Superset environment.
# @LAYER: Plugins
# @RELATION: Inherits from PluginBase. Uses SupersetClient from core.
# @RELATION: USES -> TaskContext
# @CONSTRAINT: Must use belief_scope for logging.
# [SECTION: IMPORTS]
@@ -11,6 +12,7 @@ from typing import Dict, Any, List, Optional
from ..core.plugin_base import PluginBase
from ..core.superset_client import SupersetClient
from ..core.logger import logger, belief_scope
from ..core.task_manager.context import TaskContext
# [/SECTION]
# [DEF:SearchPlugin:Class]
@@ -99,18 +101,26 @@ class SearchPlugin(PluginBase):
# [/DEF:get_schema:Function]
# [DEF:execute:Function]
# @PURPOSE: Executes the dataset search logic.
# @PURPOSE: Executes the dataset search logic with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Search parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: Params contain valid 'env' and 'query'.
# @POST: Returns a dictionary with count and results list.
# @RETURN: Dict[str, Any] - Search results.
async def execute(self, params: Dict[str, Any]) -> Dict[str, Any]:
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None) -> Dict[str, Any]:
with belief_scope("SearchPlugin.execute", f"params={params}"):
env_name = params.get("env")
search_query = params.get("query")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
# Create sub-loggers for different components
superset_log = log.with_source("superset_api") if context else log
search_log = log.with_source("search") if context else log
if not env_name or not search_query:
logger.error("[SearchPlugin.execute][State] Missing required parameters.")
log.error("Missing required parameters: env, query")
raise ValueError("Missing required parameters: env, query")
# Get config and initialize client
@@ -118,20 +128,20 @@ class SearchPlugin(PluginBase):
config_manager = get_config_manager()
env_config = config_manager.get_environment(env_name)
if not env_config:
logger.error(f"[SearchPlugin.execute][State] Environment '{env_name}' not found.")
log.error(f"Environment '{env_name}' not found in configuration.")
raise ValueError(f"Environment '{env_name}' not found in configuration.")
client = SupersetClient(env_config)
client.authenticate()
logger.info(f"[SearchPlugin.execute][Action] Searching for pattern: '{search_query}' in environment: {env_name}")
log.info(f"Searching for pattern: '{search_query}' in environment: {env_name}")
try:
# Ported logic from search_script.py
_, datasets = client.get_datasets(query={"columns": ["id", "table_name", "sql", "database", "columns"]})
if not datasets:
logger.warning("[SearchPlugin.execute][State] No datasets found.")
search_log.warning("No datasets found.")
return {"count": 0, "results": []}
pattern = re.compile(search_query, re.IGNORECASE)
@@ -155,17 +165,17 @@ class SearchPlugin(PluginBase):
"full_value": value_str
})
logger.info(f"[SearchPlugin.execute][Success] Found matches in {len(results)} locations.")
search_log.info(f"Found matches in {len(results)} locations.")
return {
"count": len(results),
"results": results
}
except re.error as e:
logger.error(f"[SearchPlugin.execute][Failure] Invalid regex pattern: {e}")
search_log.error(f"Invalid regex pattern: {e}")
raise ValueError(f"Invalid regex pattern: {e}")
except Exception as e:
logger.error(f"[SearchPlugin.execute][Failure] Error during search: {e}")
log.error(f"Error during search: {e}")
raise
# [/DEF:execute:Function]

View File

@@ -5,6 +5,7 @@
# @LAYER: App
# @RELATION: IMPLEMENTS -> PluginBase
# @RELATION: DEPENDS_ON -> backend.src.models.storage
# @RELATION: USES -> TaskContext
#
# @INVARIANT: All file operations must be restricted to the configured storage root.
@@ -20,6 +21,7 @@ from ...core.plugin_base import PluginBase
from ...core.logger import belief_scope, logger
from ...models.storage import StoredFile, FileCategory, StorageConfig
from ...dependencies import get_config_manager
from ...core.task_manager.context import TaskContext
# [/SECTION]
# [DEF:StoragePlugin:Class]
@@ -112,12 +114,21 @@ class StoragePlugin(PluginBase):
# [/DEF:get_schema:Function]
# [DEF:execute:Function]
# @PURPOSE: Executes storage-related tasks (placeholder for PluginBase compliance).
# @PURPOSE: Executes storage-related tasks with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Storage parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: params must match the plugin schema.
# @POST: Task is executed and logged.
async def execute(self, params: Dict[str, Any]):
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
with belief_scope("StoragePlugin:execute"):
logger.info(f"[StoragePlugin][Action] Executing with params: {params}")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
# Create sub-loggers for different components
storage_log = log.with_source("storage") if context else log
filesystem_log = log.with_source("filesystem") if context else log
storage_log.info(f"Executing with params: {params}")
# [/DEF:execute:Function]
# [DEF:get_storage_root:Function]

View File

@@ -1,5 +1,6 @@
# [DEF:backend.src.scripts.create_admin:Module]
#
# @TIER: STANDARD
# @SEMANTICS: admin, setup, user, auth, cli
# @PURPOSE: CLI tool for creating the initial admin user.
# @LAYER: Scripts

View File

@@ -15,43 +15,74 @@ from cryptography.fernet import Fernet
import os
# [DEF:EncryptionManager:Class]
# @TIER: CRITICAL
# @PURPOSE: Handles encryption and decryption of sensitive data like API keys.
# @INVARIANT: Uses a secret key from environment or a default one (fallback only for dev).
class EncryptionManager:
# @INVARIANT: Uses a secret key from environment or a default one (fallback only for dev).
# [DEF:EncryptionManager.__init__:Function]
# @PURPOSE: Initialize the encryption manager with a Fernet key.
# @PRE: ENCRYPTION_KEY env var must be set or use default dev key.
# @POST: Fernet instance ready for encryption/decryption.
def __init__(self):
self.key = os.getenv("ENCRYPTION_KEY", "ZcytYzi0iHIl4Ttr-GdAEk117aGRogkGvN3wiTxrPpE=").encode()
self.fernet = Fernet(self.key)
# [/DEF:EncryptionManager.__init__:Function]
# [DEF:EncryptionManager.encrypt:Function]
# @PURPOSE: Encrypt a plaintext string.
# @PRE: data must be a non-empty string.
# @POST: Returns encrypted string.
def encrypt(self, data: str) -> str:
return self.fernet.encrypt(data.encode()).decode()
# [/DEF:EncryptionManager.encrypt:Function]
# [DEF:EncryptionManager.decrypt:Function]
# @PURPOSE: Decrypt an encrypted string.
# @PRE: encrypted_data must be a valid Fernet-encrypted string.
# @POST: Returns original plaintext string.
def decrypt(self, encrypted_data: str) -> str:
return self.fernet.decrypt(encrypted_data.encode()).decode()
# [/DEF:EncryptionManager.decrypt:Function]
# [/DEF:EncryptionManager:Class]
# [DEF:LLMProviderService:Class]
# @TIER: STANDARD
# @PURPOSE: Service to manage LLM provider lifecycle.
class LLMProviderService:
# [DEF:LLMProviderService.__init__:Function]
# @PURPOSE: Initialize the service with database session.
# @PRE: db must be a valid SQLAlchemy Session.
# @POST: Service ready for provider operations.
def __init__(self, db: Session):
self.db = db
self.encryption = EncryptionManager()
# [/DEF:LLMProviderService.__init__:Function]
# [DEF:get_all_providers:Function]
# @TIER: STANDARD
# @PURPOSE: Returns all configured LLM providers.
# @PRE: Database connection must be active.
# @POST: Returns list of all LLMProvider records.
def get_all_providers(self) -> List[LLMProvider]:
with belief_scope("get_all_providers"):
return self.db.query(LLMProvider).all()
# [/DEF:get_all_providers:Function]
# [DEF:get_provider:Function]
# @TIER: STANDARD
# @PURPOSE: Returns a single LLM provider by ID.
# @PRE: provider_id must be a valid string.
# @POST: Returns LLMProvider or None if not found.
def get_provider(self, provider_id: str) -> Optional[LLMProvider]:
with belief_scope("get_provider"):
return self.db.query(LLMProvider).filter(LLMProvider.id == provider_id).first()
# [/DEF:get_provider:Function]
# [DEF:create_provider:Function]
# @TIER: STANDARD
# @PURPOSE: Creates a new LLM provider with encrypted API key.
# @PRE: config must contain valid provider configuration.
# @POST: New provider created and persisted to database.
def create_provider(self, config: LLMProviderConfig) -> LLMProvider:
with belief_scope("create_provider"):
encrypted_key = self.encryption.encrypt(config.api_key)
@@ -70,7 +101,10 @@ class LLMProviderService:
# [/DEF:create_provider:Function]
# [DEF:update_provider:Function]
# @TIER: STANDARD
# @PURPOSE: Updates an existing LLM provider.
# @PRE: provider_id must exist, config must be valid.
# @POST: Provider updated and persisted to database.
def update_provider(self, provider_id: str, config: LLMProviderConfig) -> Optional[LLMProvider]:
with belief_scope("update_provider"):
db_provider = self.get_provider(provider_id)
@@ -92,7 +126,10 @@ class LLMProviderService:
# [/DEF:update_provider:Function]
# [DEF:delete_provider:Function]
# @TIER: STANDARD
# @PURPOSE: Deletes an LLM provider.
# @PRE: provider_id must exist.
# @POST: Provider removed from database.
def delete_provider(self, provider_id: str) -> bool:
with belief_scope("delete_provider"):
db_provider = self.get_provider(provider_id)
@@ -104,7 +141,10 @@ class LLMProviderService:
# [/DEF:delete_provider:Function]
# [DEF:get_decrypted_api_key:Function]
# @TIER: STANDARD
# @PURPOSE: Returns the decrypted API key for a provider.
# @PRE: provider_id must exist with valid encrypted key.
# @POST: Returns decrypted API key or None on failure.
def get_decrypted_api_key(self, provider_id: str) -> Optional[str]:
with belief_scope("get_decrypted_api_key"):
db_provider = self.get_provider(provider_id)