semantic update

2026-02-08 22:53:54 +03:00
parent e6087bd3c1
commit 235b0e3c9f
71 changed files with 68034 additions and 62417 deletions
--- a/backend/src/plugins/llm_analysis/plugin.py
+++ b/backend/src/plugins/llm_analysis/plugin.py
@@ -7,6 +7,7 @@
 # @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.ScreenshotService
 # @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.LLMClient
 # @RELATION: CALLS -> backend.src.services.llm_provider.LLMProviderService
+# @RELATION: USES -> TaskContext
 # @INVARIANT: All LLM interactions must be executed as asynchronous tasks.

 from typing import Dict, Any, Optional, List
@@ -23,6 +24,7 @@ from ...core.superset_client import SupersetClient
 from .service import ScreenshotService, LLMClient
 from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue
 from ...models.llm import ValidationRecord
+from ...core.task_manager.context import TaskContext

 # [DEF:DashboardValidationPlugin:Class]
 # @PURPOSE: Plugin for automated dashboard health analysis using LLMs.
@@ -56,28 +58,27 @@ class DashboardValidationPlugin(PluginBase):
        }

    # [DEF:DashboardValidationPlugin.execute:Function]
-    # @PURPOSE: Executes the dashboard validation task.
+    # @PURPOSE: Executes the dashboard validation task with TaskContext support.
+    # @PARAM: params (Dict[str, Any]) - Validation parameters.
+    # @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
    # @PRE: params contains dashboard_id, environment_id, and provider_id.
    # @POST: Returns a dictionary with validation results and persists them to the database.
    # @SIDE_EFFECT: Captures a screenshot, calls LLM API, and writes to the database.
-    async def execute(self, params: Dict[str, Any]):
+    async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
        with belief_scope("execute", f"plugin_id={self.id}"):
-            logger.info(f"Executing {self.name} with params: {params}")
+            # Use TaskContext logger if available, otherwise fall back to app logger
+            log = context.logger if context else logger
+            
+            # Create sub-loggers for different components
+            llm_log = log.with_source("llm") if context else log
+            screenshot_log = log.with_source("screenshot") if context else log
+            superset_log = log.with_source("superset_api") if context else log
+            
+            log.info(f"Executing {self.name} with params: {params}")
            
            dashboard_id = params.get("dashboard_id")
            env_id = params.get("environment_id")
            provider_id = params.get("provider_id")
-            task_id = params.get("_task_id")
-
-            # Helper to log to both app logger and task manager logs
-            def task_log(level: str, message: str, context: Optional[Dict] = None):
-                logger.log(getattr(logging, level.upper()), message)
-                if task_id:
-                    from ...dependencies import get_task_manager
-                    try:
-                        tm = get_task_manager()
-                        tm._add_log(task_id, level.upper(), message, context)
-                    except: pass

            db = SessionLocal()
            try:
@@ -86,25 +87,26 @@ class DashboardValidationPlugin(PluginBase):
                config_mgr = get_config_manager()
                env = config_mgr.get_environment(env_id)
                if not env:
+                    log.error(f"Environment {env_id} not found")
                    raise ValueError(f"Environment {env_id} not found")

                # 2. Get LLM Provider
                llm_service = LLMProviderService(db)
                db_provider = llm_service.get_provider(provider_id)
                if not db_provider:
+                    log.error(f"LLM Provider {provider_id} not found")
                    raise ValueError(f"LLM Provider {provider_id} not found")
                
-                logger.info(f"[DashboardValidationPlugin.execute] Retrieved provider config:")
-                logger.info(f"[DashboardValidationPlugin.execute]   Provider ID: {db_provider.id}")
-                logger.info(f"[DashboardValidationPlugin.execute]   Provider Name: {db_provider.name}")
-                logger.info(f"[DashboardValidationPlugin.execute]   Provider Type: {db_provider.provider_type}")
-                logger.info(f"[DashboardValidationPlugin.execute]   Base URL: {db_provider.base_url}")
-                logger.info(f"[DashboardValidationPlugin.execute]   Default Model: {db_provider.default_model}")
-                logger.info(f"[DashboardValidationPlugin.execute]   Is Active: {db_provider.is_active}")
+                llm_log.debug(f"Retrieved provider config:")
+                llm_log.debug(f"  Provider ID: {db_provider.id}")
+                llm_log.debug(f"  Provider Name: {db_provider.name}")
+                llm_log.debug(f"  Provider Type: {db_provider.provider_type}")
+                llm_log.debug(f"  Base URL: {db_provider.base_url}")
+                llm_log.debug(f"  Default Model: {db_provider.default_model}")
+                llm_log.debug(f"  Is Active: {db_provider.is_active}")
                
                api_key = llm_service.get_decrypted_api_key(provider_id)
-                logger.info(f"[DashboardValidationPlugin.execute]   API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
-                logger.info(f"[DashboardValidationPlugin.execute]   API Key Length: {len(api_key) if api_key else 0}")
+                llm_log.debug(f"API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
                
                # Check if API key was successfully decrypted
                if not api_key:
@@ -124,7 +126,9 @@ class DashboardValidationPlugin(PluginBase):
                filename = f"{dashboard_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
                screenshot_path = os.path.join(screenshots_dir, filename)
                
+                screenshot_log.info(f"Capturing screenshot for dashboard {dashboard_id}")
                await screenshot_service.capture_dashboard(dashboard_id, screenshot_path)
+                screenshot_log.debug(f"Screenshot saved to: {screenshot_path}")

                # 4. Fetch Logs (from Environment /api/v1/log/)
                logs = []
@@ -147,6 +151,7 @@ class DashboardValidationPlugin(PluginBase):
                        "page_size": 100
                    }
                    
+                    superset_log.debug(f"Fetching logs for dashboard {dashboard_id}")
                    response = client.network.request(
                        method="GET",
                        endpoint="/log/",
@@ -162,9 +167,10 @@ class DashboardValidationPlugin(PluginBase):
                    
                    if not logs:
                        logs = ["No recent logs found for this dashboard."]
+                        superset_log.debug("No recent logs found for this dashboard")

                except Exception as e:
-                    logger.warning(f"Failed to fetch logs from environment: {e}")
+                    superset_log.warning(f"Failed to fetch logs from environment: {e}")
                    logs = [f"Error fetching remote logs: {str(e)}"]

                # 5. Analyze with LLM
@@ -175,14 +181,15 @@ class DashboardValidationPlugin(PluginBase):
                    default_model=db_provider.default_model
                )
                
+                llm_log.info(f"Analyzing dashboard {dashboard_id} with LLM")
                analysis = await llm_client.analyze_dashboard(screenshot_path, logs)
                
                # Log analysis summary to task logs for better visibility
-                task_log("INFO", f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
-                task_log("INFO", f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
+                llm_log.info(f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
+                llm_log.info(f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
                if analysis.get("issues"):
                    for i, issue in enumerate(analysis["issues"]):
-                        task_log("INFO", f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")
+                        llm_log.info(f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")

                # 6. Persist Result
                validation_result = ValidationResult(
@@ -207,13 +214,13 @@ class DashboardValidationPlugin(PluginBase):

                # 7. Notification on failure (US1 / FR-015)
                if validation_result.status == ValidationStatus.FAIL:
-                    task_log("WARNING", f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
+                    log.warning(f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
                    # Placeholder for Email/Pulse notification dispatch
                    # In a real implementation, we would call a NotificationService here
                    # with a payload containing the summary and a link to the report.

                # Final log to ensure all analysis is visible in task logs
-                task_log("INFO", f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
+                log.info(f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
                
                return validation_result.dict()

@@ -254,13 +261,22 @@ class DocumentationPlugin(PluginBase):
        }

    # [DEF:DocumentationPlugin.execute:Function]
-    # @PURPOSE: Executes the dataset documentation task.
+    # @PURPOSE: Executes the dataset documentation task with TaskContext support.
+    # @PARAM: params (Dict[str, Any]) - Documentation parameters.
+    # @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
    # @PRE: params contains dataset_id, environment_id, and provider_id.
    # @POST: Returns generated documentation and updates the dataset in Superset.
    # @SIDE_EFFECT: Calls LLM API and updates dataset metadata in Superset.
-    async def execute(self, params: Dict[str, Any]):
+    async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
        with belief_scope("execute", f"plugin_id={self.id}"):
-            logger.info(f"Executing {self.name} with params: {params}")
+            # Use TaskContext logger if available, otherwise fall back to app logger
+            log = context.logger if context else logger
+            
+            # Create sub-loggers for different components
+            llm_log = log.with_source("llm") if context else log
+            superset_log = log.with_source("superset_api") if context else log
+            
+            log.info(f"Executing {self.name} with params: {params}")
            
            dataset_id = params.get("dataset_id")
            env_id = params.get("environment_id")
@@ -273,25 +289,25 @@ class DocumentationPlugin(PluginBase):
                config_mgr = get_config_manager()
                env = config_mgr.get_environment(env_id)
                if not env:
+                    log.error(f"Environment {env_id} not found")
                    raise ValueError(f"Environment {env_id} not found")

                # 2. Get LLM Provider
                llm_service = LLMProviderService(db)
                db_provider = llm_service.get_provider(provider_id)
                if not db_provider:
+                    log.error(f"LLM Provider {provider_id} not found")
                    raise ValueError(f"LLM Provider {provider_id} not found")
                
-                logger.info(f"[DocumentationPlugin.execute] Retrieved provider config:")
-                logger.info(f"[DocumentationPlugin.execute]   Provider ID: {db_provider.id}")
-                logger.info(f"[DocumentationPlugin.execute]   Provider Name: {db_provider.name}")
-                logger.info(f"[DocumentationPlugin.execute]   Provider Type: {db_provider.provider_type}")
-                logger.info(f"[DocumentationPlugin.execute]   Base URL: {db_provider.base_url}")
-                logger.info(f"[DocumentationPlugin.execute]   Default Model: {db_provider.default_model}")
-                logger.info(f"[DocumentationPlugin.execute]   Is Active: {db_provider.is_active}")
+                llm_log.debug(f"Retrieved provider config:")
+                llm_log.debug(f"  Provider ID: {db_provider.id}")
+                llm_log.debug(f"  Provider Name: {db_provider.name}")
+                llm_log.debug(f"  Provider Type: {db_provider.provider_type}")
+                llm_log.debug(f"  Base URL: {db_provider.base_url}")
+                llm_log.debug(f"  Default Model: {db_provider.default_model}")
                
                api_key = llm_service.get_decrypted_api_key(provider_id)
-                logger.info(f"[DocumentationPlugin.execute]   API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
-                logger.info(f"[DocumentationPlugin.execute]   API Key Length: {len(api_key) if api_key else 0}")
+                llm_log.debug(f"API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
                
                # Check if API key was successfully decrypted
                if not api_key:
@@ -305,10 +321,8 @@ class DocumentationPlugin(PluginBase):
                from ...core.superset_client import SupersetClient
                client = SupersetClient(env)
                
-                # Optimistic locking check (T045)
+                superset_log.debug(f"Fetching dataset {dataset_id}")
                dataset = client.get_dataset(int(dataset_id))
-                # dataset structure might vary, ensure we get the right field
-                original_changed_on = dataset.get("changed_on_utc") or dataset.get("result", {}).get("changed_on_utc")
                
                # Extract columns and existing descriptions
                columns_data = []
@@ -318,6 +332,7 @@ class DocumentationPlugin(PluginBase):
                        "type": col.get("type"),
                        "description": col.get("description")
                    })
+                superset_log.debug(f"Extracted {len(columns_data)} columns from dataset")

                # 4. Construct Prompt & Analyze (US2 / T025)
                llm_client = LLMClient(
@@ -345,12 +360,10 @@ class DocumentationPlugin(PluginBase):
                """
                
                # Using a generic chat completion for text-only US2
-                # We use the shared get_json_completion method from LLMClient
+                llm_log.info(f"Generating documentation for dataset {dataset_id}")
                doc_result = await llm_client.get_json_completion([{"role": "user", "content": prompt}])

                # 5. Update Metadata (US2 / T026)
-                # This part normally goes to mapping_service, but we implement the logic here for the plugin flow
-                # We'll update the dataset in Superset
                update_payload = {
                    "description": doc_result["dataset_description"],
                    "columns": []
@@ -365,8 +378,11 @@ class DocumentationPlugin(PluginBase):
                                "description": col_doc["description"]
                            })

+                superset_log.info(f"Updating dataset {dataset_id} with generated documentation")
                client.update_dataset(int(dataset_id), update_payload)
                
+                log.info(f"Documentation completed for dataset {dataset_id}")
+                
                return doc_result

            finally:
--- a/backend/src/plugins/llm_analysis/scheduler.py
+++ b/backend/src/plugins/llm_analysis/scheduler.py
@@ -39,6 +39,7 @@ def schedule_dashboard_validation(dashboard_id: str, cron_expression: str, param
            **_parse_cron(cron_expression)
        )
        logger.info(f"Scheduled validation for dashboard {dashboard_id} with cron {cron_expression}")
+# [/DEF:schedule_dashboard_validation:Function]

 # [DEF:_parse_cron:Function]
 # @PURPOSE: Basic cron parser placeholder.
@@ -56,5 +57,6 @@ def _parse_cron(cron: str) -> Dict[str, str]:
        "month": parts[3],
        "day_of_week": parts[4]
    }
+# [/DEF:_parse_cron:Function]

 # [/DEF:backend/src/plugins/llm_analysis/scheduler.py:Module]