Вроде работает

2026-01-30 11:10:16 +03:00
parent 8044f85ea4
commit 252a8601a9
43 changed files with 1987 additions and 270 deletions
--- a/backend/src/plugins/llm_analysis/plugin.py
+++ b/backend/src/plugins/llm_analysis/plugin.py
@@ -0,0 +1,272 @@
+# [DEF:backend.src.plugins.llm_analysis.plugin:Module]
+# @TIER: STANDARD
+# @SEMANTICS: plugin, llm, analysis, documentation
+# @PURPOSE: Implements DashboardValidationPlugin and DocumentationPlugin.
+# @LAYER: Domain
+# @RELATION: INHERITS_FROM -> backend.src.core.plugin_base.PluginBase
+
+from typing import Dict, Any, Optional, List
+import os
+from datetime import datetime, timedelta
+from ...core.plugin_base import PluginBase
+from ...core.logger import belief_scope, logger
+from ...core.database import SessionLocal
+from ...core.config_manager import ConfigManager
+from ...services.llm_provider import LLMProviderService
+from .service import ScreenshotService, LLMClient
+from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue
+from ...models.llm import ValidationRecord
+
+# [DEF:DashboardValidationPlugin:Class]
+# @PURPOSE: Plugin for automated dashboard health analysis using LLMs.
+class DashboardValidationPlugin(PluginBase):
+    @property
+    def id(self) -> str:
+        return "llm_dashboard_validation"
+
+    @property
+    def name(self) -> str:
+        return "Dashboard LLM Validation"
+
+    @property
+    def description(self) -> str:
+        return "Automated dashboard health analysis using multimodal LLMs."
+
+    @property
+    def version(self) -> str:
+        return "1.0.0"
+
+    def get_schema(self) -> Dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "dashboard_id": {"type": "string", "title": "Dashboard ID"},
+                "environment_id": {"type": "string", "title": "Environment ID"},
+                "provider_id": {"type": "string", "title": "LLM Provider ID"}
+            },
+            "required": ["dashboard_id", "environment_id", "provider_id"]
+        }
+
+    async def execute(self, params: Dict[str, Any]):
+        with belief_scope("execute", f"plugin_id={self.id}"):
+            logger.info(f"Executing {self.name} with params: {params}")
+            
+            dashboard_id = params.get("dashboard_id")
+            env_id = params.get("environment_id")
+            provider_id = params.get("provider_id")
+            task_id = params.get("_task_id")
+
+            db = SessionLocal()
+            try:
+                # 1. Get Environment
+                from ...dependencies import get_config_manager
+                config_mgr = get_config_manager()
+                env = config_mgr.get_environment(env_id)
+                if not env:
+                    raise ValueError(f"Environment {env_id} not found")
+
+                # 2. Get LLM Provider
+                llm_service = LLMProviderService(db)
+                db_provider = llm_service.get_provider(provider_id)
+                if not db_provider:
+                    raise ValueError(f"LLM Provider {provider_id} not found")
+                
+                api_key = llm_service.get_decrypted_api_key(provider_id)
+
+                # 3. Capture Screenshot
+                screenshot_service = ScreenshotService(env)
+                os.makedirs("ss-tools-storage/screenshots", exist_ok=True)
+                screenshot_path = f"ss-tools-storage/screenshots/{dashboard_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
+                
+                await screenshot_service.capture_dashboard(dashboard_id, screenshot_path)
+
+                # 4. Fetch Logs (Last 100 lines from backend.log)
+                logs = []
+                log_file = "backend.log"
+                if os.path.exists(log_file):
+                    with open(log_file, "r") as f:
+                        # Read last 100 lines
+                        all_lines = f.readlines()
+                        logs = all_lines[-100:]
+                
+                if not logs:
+                    logs = ["No logs found in backend.log"]
+
+                # 5. Analyze with LLM
+                llm_client = LLMClient(
+                    provider_type=LLMProviderType(db_provider.provider_type),
+                    api_key=api_key,
+                    base_url=db_provider.base_url,
+                    default_model=db_provider.default_model
+                )
+                
+                analysis = await llm_client.analyze_dashboard(screenshot_path, logs)
+
+                # 6. Persist Result
+                validation_result = ValidationResult(
+                    dashboard_id=dashboard_id,
+                    status=ValidationStatus(analysis["status"]),
+                    summary=analysis["summary"],
+                    issues=[DetectedIssue(**issue) for issue in analysis["issues"]],
+                    screenshot_path=screenshot_path,
+                    raw_response=str(analysis)
+                )
+
+                db_record = ValidationRecord(
+                    dashboard_id=validation_result.dashboard_id,
+                    status=validation_result.status.value,
+                    summary=validation_result.summary,
+                    issues=[issue.dict() for issue in validation_result.issues],
+                    screenshot_path=validation_result.screenshot_path,
+                    raw_response=validation_result.raw_response
+                )
+                db.add(db_record)
+                db.commit()
+
+                # 7. Notification on failure (US1 / FR-015)
+                if validation_result.status == ValidationStatus.FAIL:
+                    logger.warning(f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
+                    # Placeholder for Email/Pulse notification dispatch
+                    # In a real implementation, we would call a NotificationService here
+                    # with a payload containing the summary and a link to the report.
+
+                return validation_result.dict()
+
+            finally:
+                db.close()
+# [/DEF:DashboardValidationPlugin:Class]
+
+# [DEF:DocumentationPlugin:Class]
+# @PURPOSE: Plugin for automated dataset documentation using LLMs.
+class DocumentationPlugin(PluginBase):
+    @property
+    def id(self) -> str:
+        return "llm_documentation"
+
+    @property
+    def name(self) -> str:
+        return "Dataset LLM Documentation"
+
+    @property
+    def description(self) -> str:
+        return "Automated dataset and column documentation using LLMs."
+
+    @property
+    def version(self) -> str:
+        return "1.0.0"
+
+    def get_schema(self) -> Dict[str, Any]:
+        return {
+            "type": "object",
+            "properties": {
+                "dataset_id": {"type": "string", "title": "Dataset ID"},
+                "environment_id": {"type": "string", "title": "Environment ID"},
+                "provider_id": {"type": "string", "title": "LLM Provider ID"}
+            },
+            "required": ["dataset_id", "environment_id", "provider_id"]
+        }
+
+    async def execute(self, params: Dict[str, Any]):
+        with belief_scope("execute", f"plugin_id={self.id}"):
+            logger.info(f"Executing {self.name} with params: {params}")
+            
+            dataset_id = params.get("dataset_id")
+            env_id = params.get("environment_id")
+            provider_id = params.get("provider_id")
+
+            db = SessionLocal()
+            try:
+                # 1. Get Environment
+                from ...dependencies import get_config_manager
+                config_mgr = get_config_manager()
+                env = config_mgr.get_environment(env_id)
+                if not env:
+                    raise ValueError(f"Environment {env_id} not found")
+
+                # 2. Get LLM Provider
+                llm_service = LLMProviderService(db)
+                db_provider = llm_service.get_provider(provider_id)
+                if not db_provider:
+                    raise ValueError(f"LLM Provider {provider_id} not found")
+                
+                api_key = llm_service.get_decrypted_api_key(provider_id)
+
+                # 3. Fetch Metadata (US2 / T024)
+                from ...core.superset_client import SupersetClient
+                client = SupersetClient(env)
+                
+                # Optimistic locking check (T045)
+                dataset = client.get_dataset(int(dataset_id))
+                # dataset structure might vary, ensure we get the right field
+                original_changed_on = dataset.get("changed_on_utc") or dataset.get("result", {}).get("changed_on_utc")
+                
+                # Extract columns and existing descriptions
+                columns_data = []
+                for col in dataset.get("columns", []):
+                    columns_data.append({
+                        "name": col.get("column_name"),
+                        "type": col.get("type"),
+                        "description": col.get("description")
+                    })
+
+                # 4. Construct Prompt & Analyze (US2 / T025)
+                llm_client = LLMClient(
+                    provider_type=LLMProviderType(db_provider.provider_type),
+                    api_key=api_key,
+                    base_url=db_provider.base_url,
+                    default_model=db_provider.default_model
+                )
+                
+                prompt = f"""
+                Generate professional documentation for the following dataset and its columns.
+                Dataset: {dataset.get('table_name')}
+                Columns: {columns_data}
+                
+                Provide the documentation in JSON format:
+                {{
+                    "dataset_description": "General description of the dataset",
+                    "column_descriptions": [
+                        {{
+                            "name": "column_name",
+                            "description": "Generated description"
+                        }}
+                    ]
+                }}
+                """
+                
+                # Using a generic chat completion for text-only US2
+                response = await llm_client.client.chat.completions.create(
+                    model=db_provider.default_model,
+                    messages=[{"role": "user", "content": prompt}],
+                    response_format={"type": "json_object"}
+                )
+                
+                import json
+                doc_result = json.loads(response.choices[0].message.content)
+
+                # 5. Update Metadata (US2 / T026)
+                # This part normally goes to mapping_service, but we implement the logic here for the plugin flow
+                # We'll update the dataset in Superset
+                update_payload = {
+                    "description": doc_result["dataset_description"],
+                    "columns": []
+                }
+                
+                # Map generated descriptions back to column IDs
+                for col_doc in doc_result["column_descriptions"]:
+                    for col in dataset.get("columns", []):
+                        if col.get("column_name") == col_doc["name"]:
+                            update_payload["columns"].append({
+                                "id": col.get("id"),
+                                "description": col_doc["description"]
+                            })
+
+                client.update_dataset(int(dataset_id), update_payload)
+                
+                return doc_result
+
+            finally:
+                db.close()
+# [/DEF:DocumentationPlugin:Class]
+
+# [/DEF:backend.src.plugins.llm_analysis.plugin:Module]