semantic update

This commit is contained in:
2026-02-08 22:53:54 +03:00
parent e6087bd3c1
commit 235b0e3c9f
71 changed files with 68034 additions and 62417 deletions

View File

@@ -7,6 +7,7 @@
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.ScreenshotService
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.LLMClient
# @RELATION: CALLS -> backend.src.services.llm_provider.LLMProviderService
# @RELATION: USES -> TaskContext
# @INVARIANT: All LLM interactions must be executed as asynchronous tasks.
from typing import Dict, Any, Optional, List
@@ -23,6 +24,7 @@ from ...core.superset_client import SupersetClient
from .service import ScreenshotService, LLMClient
from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue
from ...models.llm import ValidationRecord
from ...core.task_manager.context import TaskContext
# [DEF:DashboardValidationPlugin:Class]
# @PURPOSE: Plugin for automated dashboard health analysis using LLMs.
@@ -56,28 +58,27 @@ class DashboardValidationPlugin(PluginBase):
}
# [DEF:DashboardValidationPlugin.execute:Function]
# @PURPOSE: Executes the dashboard validation task.
# @PURPOSE: Executes the dashboard validation task with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Validation parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: params contains dashboard_id, environment_id, and provider_id.
# @POST: Returns a dictionary with validation results and persists them to the database.
# @SIDE_EFFECT: Captures a screenshot, calls LLM API, and writes to the database.
async def execute(self, params: Dict[str, Any]):
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
# Create sub-loggers for different components
llm_log = log.with_source("llm") if context else log
screenshot_log = log.with_source("screenshot") if context else log
superset_log = log.with_source("superset_api") if context else log
log.info(f"Executing {self.name} with params: {params}")
dashboard_id = params.get("dashboard_id")
env_id = params.get("environment_id")
provider_id = params.get("provider_id")
task_id = params.get("_task_id")
# Helper to log to both app logger and task manager logs
def task_log(level: str, message: str, context: Optional[Dict] = None):
logger.log(getattr(logging, level.upper()), message)
if task_id:
from ...dependencies import get_task_manager
try:
tm = get_task_manager()
tm._add_log(task_id, level.upper(), message, context)
except: pass
db = SessionLocal()
try:
@@ -86,25 +87,26 @@ class DashboardValidationPlugin(PluginBase):
config_mgr = get_config_manager()
env = config_mgr.get_environment(env_id)
if not env:
log.error(f"Environment {env_id} not found")
raise ValueError(f"Environment {env_id} not found")
# 2. Get LLM Provider
llm_service = LLMProviderService(db)
db_provider = llm_service.get_provider(provider_id)
if not db_provider:
log.error(f"LLM Provider {provider_id} not found")
raise ValueError(f"LLM Provider {provider_id} not found")
logger.info(f"[DashboardValidationPlugin.execute] Retrieved provider config:")
logger.info(f"[DashboardValidationPlugin.execute] Provider ID: {db_provider.id}")
logger.info(f"[DashboardValidationPlugin.execute] Provider Name: {db_provider.name}")
logger.info(f"[DashboardValidationPlugin.execute] Provider Type: {db_provider.provider_type}")
logger.info(f"[DashboardValidationPlugin.execute] Base URL: {db_provider.base_url}")
logger.info(f"[DashboardValidationPlugin.execute] Default Model: {db_provider.default_model}")
logger.info(f"[DashboardValidationPlugin.execute] Is Active: {db_provider.is_active}")
llm_log.debug(f"Retrieved provider config:")
llm_log.debug(f" Provider ID: {db_provider.id}")
llm_log.debug(f" Provider Name: {db_provider.name}")
llm_log.debug(f" Provider Type: {db_provider.provider_type}")
llm_log.debug(f" Base URL: {db_provider.base_url}")
llm_log.debug(f" Default Model: {db_provider.default_model}")
llm_log.debug(f" Is Active: {db_provider.is_active}")
api_key = llm_service.get_decrypted_api_key(provider_id)
logger.info(f"[DashboardValidationPlugin.execute] API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
logger.info(f"[DashboardValidationPlugin.execute] API Key Length: {len(api_key) if api_key else 0}")
llm_log.debug(f"API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
# Check if API key was successfully decrypted
if not api_key:
@@ -124,7 +126,9 @@ class DashboardValidationPlugin(PluginBase):
filename = f"{dashboard_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
screenshot_path = os.path.join(screenshots_dir, filename)
screenshot_log.info(f"Capturing screenshot for dashboard {dashboard_id}")
await screenshot_service.capture_dashboard(dashboard_id, screenshot_path)
screenshot_log.debug(f"Screenshot saved to: {screenshot_path}")
# 4. Fetch Logs (from Environment /api/v1/log/)
logs = []
@@ -147,6 +151,7 @@ class DashboardValidationPlugin(PluginBase):
"page_size": 100
}
superset_log.debug(f"Fetching logs for dashboard {dashboard_id}")
response = client.network.request(
method="GET",
endpoint="/log/",
@@ -162,9 +167,10 @@ class DashboardValidationPlugin(PluginBase):
if not logs:
logs = ["No recent logs found for this dashboard."]
superset_log.debug("No recent logs found for this dashboard")
except Exception as e:
logger.warning(f"Failed to fetch logs from environment: {e}")
superset_log.warning(f"Failed to fetch logs from environment: {e}")
logs = [f"Error fetching remote logs: {str(e)}"]
# 5. Analyze with LLM
@@ -175,14 +181,15 @@ class DashboardValidationPlugin(PluginBase):
default_model=db_provider.default_model
)
llm_log.info(f"Analyzing dashboard {dashboard_id} with LLM")
analysis = await llm_client.analyze_dashboard(screenshot_path, logs)
# Log analysis summary to task logs for better visibility
task_log("INFO", f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
task_log("INFO", f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
llm_log.info(f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
llm_log.info(f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
if analysis.get("issues"):
for i, issue in enumerate(analysis["issues"]):
task_log("INFO", f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")
llm_log.info(f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")
# 6. Persist Result
validation_result = ValidationResult(
@@ -207,13 +214,13 @@ class DashboardValidationPlugin(PluginBase):
# 7. Notification on failure (US1 / FR-015)
if validation_result.status == ValidationStatus.FAIL:
task_log("WARNING", f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
log.warning(f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
# Placeholder for Email/Pulse notification dispatch
# In a real implementation, we would call a NotificationService here
# with a payload containing the summary and a link to the report.
# Final log to ensure all analysis is visible in task logs
task_log("INFO", f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
log.info(f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
return validation_result.dict()
@@ -254,13 +261,22 @@ class DocumentationPlugin(PluginBase):
}
# [DEF:DocumentationPlugin.execute:Function]
# @PURPOSE: Executes the dataset documentation task.
# @PURPOSE: Executes the dataset documentation task with TaskContext support.
# @PARAM: params (Dict[str, Any]) - Documentation parameters.
# @PARAM: context (Optional[TaskContext]) - Task context for logging with source attribution.
# @PRE: params contains dataset_id, environment_id, and provider_id.
# @POST: Returns generated documentation and updates the dataset in Superset.
# @SIDE_EFFECT: Calls LLM API and updates dataset metadata in Superset.
async def execute(self, params: Dict[str, Any]):
async def execute(self, params: Dict[str, Any], context: Optional[TaskContext] = None):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
# Use TaskContext logger if available, otherwise fall back to app logger
log = context.logger if context else logger
# Create sub-loggers for different components
llm_log = log.with_source("llm") if context else log
superset_log = log.with_source("superset_api") if context else log
log.info(f"Executing {self.name} with params: {params}")
dataset_id = params.get("dataset_id")
env_id = params.get("environment_id")
@@ -273,25 +289,25 @@ class DocumentationPlugin(PluginBase):
config_mgr = get_config_manager()
env = config_mgr.get_environment(env_id)
if not env:
log.error(f"Environment {env_id} not found")
raise ValueError(f"Environment {env_id} not found")
# 2. Get LLM Provider
llm_service = LLMProviderService(db)
db_provider = llm_service.get_provider(provider_id)
if not db_provider:
log.error(f"LLM Provider {provider_id} not found")
raise ValueError(f"LLM Provider {provider_id} not found")
logger.info(f"[DocumentationPlugin.execute] Retrieved provider config:")
logger.info(f"[DocumentationPlugin.execute] Provider ID: {db_provider.id}")
logger.info(f"[DocumentationPlugin.execute] Provider Name: {db_provider.name}")
logger.info(f"[DocumentationPlugin.execute] Provider Type: {db_provider.provider_type}")
logger.info(f"[DocumentationPlugin.execute] Base URL: {db_provider.base_url}")
logger.info(f"[DocumentationPlugin.execute] Default Model: {db_provider.default_model}")
logger.info(f"[DocumentationPlugin.execute] Is Active: {db_provider.is_active}")
llm_log.debug(f"Retrieved provider config:")
llm_log.debug(f" Provider ID: {db_provider.id}")
llm_log.debug(f" Provider Name: {db_provider.name}")
llm_log.debug(f" Provider Type: {db_provider.provider_type}")
llm_log.debug(f" Base URL: {db_provider.base_url}")
llm_log.debug(f" Default Model: {db_provider.default_model}")
api_key = llm_service.get_decrypted_api_key(provider_id)
logger.info(f"[DocumentationPlugin.execute] API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
logger.info(f"[DocumentationPlugin.execute] API Key Length: {len(api_key) if api_key else 0}")
llm_log.debug(f"API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
# Check if API key was successfully decrypted
if not api_key:
@@ -305,10 +321,8 @@ class DocumentationPlugin(PluginBase):
from ...core.superset_client import SupersetClient
client = SupersetClient(env)
# Optimistic locking check (T045)
superset_log.debug(f"Fetching dataset {dataset_id}")
dataset = client.get_dataset(int(dataset_id))
# dataset structure might vary, ensure we get the right field
original_changed_on = dataset.get("changed_on_utc") or dataset.get("result", {}).get("changed_on_utc")
# Extract columns and existing descriptions
columns_data = []
@@ -318,6 +332,7 @@ class DocumentationPlugin(PluginBase):
"type": col.get("type"),
"description": col.get("description")
})
superset_log.debug(f"Extracted {len(columns_data)} columns from dataset")
# 4. Construct Prompt & Analyze (US2 / T025)
llm_client = LLMClient(
@@ -345,12 +360,10 @@ class DocumentationPlugin(PluginBase):
"""
# Using a generic chat completion for text-only US2
# We use the shared get_json_completion method from LLMClient
llm_log.info(f"Generating documentation for dataset {dataset_id}")
doc_result = await llm_client.get_json_completion([{"role": "user", "content": prompt}])
# 5. Update Metadata (US2 / T026)
# This part normally goes to mapping_service, but we implement the logic here for the plugin flow
# We'll update the dataset in Superset
update_payload = {
"description": doc_result["dataset_description"],
"columns": []
@@ -365,8 +378,11 @@ class DocumentationPlugin(PluginBase):
"description": col_doc["description"]
})
superset_log.info(f"Updating dataset {dataset_id} with generated documentation")
client.update_dataset(int(dataset_id), update_payload)
log.info(f"Documentation completed for dataset {dataset_id}")
return doc_result
finally:

View File

@@ -39,6 +39,7 @@ def schedule_dashboard_validation(dashboard_id: str, cron_expression: str, param
**_parse_cron(cron_expression)
)
logger.info(f"Scheduled validation for dashboard {dashboard_id} with cron {cron_expression}")
# [/DEF:schedule_dashboard_validation:Function]
# [DEF:_parse_cron:Function]
# @PURPOSE: Basic cron parser placeholder.
@@ -56,5 +57,6 @@ def _parse_cron(cron: str) -> Dict[str, str]:
"month": parts[3],
"day_of_week": parts[4]
}
# [/DEF:_parse_cron:Function]
# [/DEF:backend/src/plugins/llm_analysis/scheduler.py:Module]