feat(llm-plugin): switch to environment API for log retrieval

- Replace local backend.log reading with Superset API /log/ fetch
- Update DashboardValidationPlugin to use SupersetClient
- Filter logs by dashboard_id and last 24 hours
- Update spec FR-006 to reflect API usage
This commit is contained in:
2026-02-06 17:57:25 +03:00
parent f018b97ed2
commit 7de96c17c4
2 changed files with 88 additions and 21 deletions

View File

@@ -4,21 +4,28 @@
# @PURPOSE: Implements DashboardValidationPlugin and DocumentationPlugin. # @PURPOSE: Implements DashboardValidationPlugin and DocumentationPlugin.
# @LAYER: Domain # @LAYER: Domain
# @RELATION: INHERITS_FROM -> backend.src.core.plugin_base.PluginBase # @RELATION: INHERITS_FROM -> backend.src.core.plugin_base.PluginBase
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.ScreenshotService
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.LLMClient
# @RELATION: CALLS -> backend.src.services.llm_provider.LLMProviderService
# @INVARIANT: All LLM interactions must be executed as asynchronous tasks.
from typing import Dict, Any, Optional, List from typing import Dict, Any, Optional, List
import os import os
import json
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ...core.plugin_base import PluginBase from ...core.plugin_base import PluginBase
from ...core.logger import belief_scope, logger from ...core.logger import belief_scope, logger
from ...core.database import SessionLocal from ...core.database import SessionLocal
from ...core.config_manager import ConfigManager from ...core.config_manager import ConfigManager
from ...services.llm_provider import LLMProviderService from ...services.llm_provider import LLMProviderService
from ...core.superset_client import SupersetClient
from .service import ScreenshotService, LLMClient from .service import ScreenshotService, LLMClient
from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue
from ...models.llm import ValidationRecord from ...models.llm import ValidationRecord
# [DEF:DashboardValidationPlugin:Class] # [DEF:DashboardValidationPlugin:Class]
# @PURPOSE: Plugin for automated dashboard health analysis using LLMs. # @PURPOSE: Plugin for automated dashboard health analysis using LLMs.
# @RELATION: IMPLEMENTS -> backend.src.core.plugin_base.PluginBase
class DashboardValidationPlugin(PluginBase): class DashboardValidationPlugin(PluginBase):
@property @property
def id(self) -> str: def id(self) -> str:
@@ -56,6 +63,16 @@ class DashboardValidationPlugin(PluginBase):
provider_id = params.get("provider_id") provider_id = params.get("provider_id")
task_id = params.get("_task_id") task_id = params.get("_task_id")
# Helper to log to both app logger and task manager logs
def task_log(level: str, message: str, context: Optional[Dict] = None):
logger.log(getattr(logging, level.upper()), message)
if task_id:
from ...dependencies import get_task_manager
try:
tm = get_task_manager()
tm._add_log(task_id, level.upper(), message, context)
except: pass
db = SessionLocal() db = SessionLocal()
try: try:
# 1. Get Environment # 1. Get Environment
@@ -80,17 +97,46 @@ class DashboardValidationPlugin(PluginBase):
await screenshot_service.capture_dashboard(dashboard_id, screenshot_path) await screenshot_service.capture_dashboard(dashboard_id, screenshot_path)
# 4. Fetch Logs (Last 100 lines from backend.log) # 4. Fetch Logs (from Environment /api/v1/log/)
logs = [] logs = []
log_file = "backend.log" try:
if os.path.exists(log_file): client = SupersetClient(env)
with open(log_file, "r") as f:
# Read last 100 lines
all_lines = f.readlines()
logs = all_lines[-100:]
if not logs: # Calculate time window (last 24 hours)
logs = ["No logs found in backend.log"] start_time = (datetime.now() - timedelta(hours=24)).isoformat()
# Construct filter for logs
# Note: We filter by dashboard_id matching the object
query_params = {
"filters": [
{"col": "dashboard_id", "op": "eq", "value": dashboard_id},
{"col": "dttm", "op": "gt", "value": start_time}
],
"order_column": "dttm",
"order_direction": "desc",
"page": 0,
"page_size": 100
}
response = client.network.request(
method="GET",
endpoint="/log/",
params={"q": json.dumps(query_params)}
)
if isinstance(response, dict) and "result" in response:
for item in response["result"]:
action = item.get("action", "unknown")
dttm = item.get("dttm", "")
details = item.get("json", "")
logs.append(f"[{dttm}] {action}: {details}")
if not logs:
logs = ["No recent logs found for this dashboard."]
except Exception as e:
logger.warning(f"Failed to fetch logs from environment: {e}")
logs = [f"Error fetching remote logs: {str(e)}"]
# 5. Analyze with LLM # 5. Analyze with LLM
llm_client = LLMClient( llm_client = LLMClient(
@@ -102,6 +148,13 @@ class DashboardValidationPlugin(PluginBase):
analysis = await llm_client.analyze_dashboard(screenshot_path, logs) analysis = await llm_client.analyze_dashboard(screenshot_path, logs)
# Log analysis summary to task logs for better visibility
logger.info(f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
logger.info(f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
if analysis.get("issues"):
for i, issue in enumerate(analysis["issues"]):
logger.info(f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")
# 6. Persist Result # 6. Persist Result
validation_result = ValidationResult( validation_result = ValidationResult(
dashboard_id=dashboard_id, dashboard_id=dashboard_id,
@@ -130,6 +183,9 @@ class DashboardValidationPlugin(PluginBase):
# In a real implementation, we would call a NotificationService here # In a real implementation, we would call a NotificationService here
# with a payload containing the summary and a link to the report. # with a payload containing the summary and a link to the report.
# Final log to ensure all analysis is visible in task logs
task_log("INFO", f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
return validation_result.dict() return validation_result.dict()
finally: finally:
@@ -138,6 +194,7 @@ class DashboardValidationPlugin(PluginBase):
# [DEF:DocumentationPlugin:Class] # [DEF:DocumentationPlugin:Class]
# @PURPOSE: Plugin for automated dataset documentation using LLMs. # @PURPOSE: Plugin for automated dataset documentation using LLMs.
# @RELATION: IMPLEMENTS -> backend.src.core.plugin_base.PluginBase
class DocumentationPlugin(PluginBase): class DocumentationPlugin(PluginBase):
@property @property
def id(self) -> str: def id(self) -> str:
@@ -166,6 +223,16 @@ class DocumentationPlugin(PluginBase):
"required": ["dataset_id", "environment_id", "provider_id"] "required": ["dataset_id", "environment_id", "provider_id"]
} }
# [DEF:execute:Function]
# @PURPOSE: Executes the dashboard validation task.
# @PRE: params contains dashboard_id, environment_id, and provider_id.
# @POST: Returns a dictionary with validation results and persists them to the database.
# @SIDE_EFFECT: Captures a screenshot, calls LLM API, and writes to the database.
# [DEF:execute:Function]
# @PURPOSE: Executes the dataset documentation task.
# @PRE: params contains dataset_id, environment_id, and provider_id.
# @POST: Returns generated documentation and updates the dataset in Superset.
# @SIDE_EFFECT: Calls LLM API and updates dataset metadata in Superset.
async def execute(self, params: Dict[str, Any]): async def execute(self, params: Dict[str, Any]):
with belief_scope("execute", f"plugin_id={self.id}"): with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}") logger.info(f"Executing {self.name} with params: {params}")
@@ -235,14 +302,8 @@ class DocumentationPlugin(PluginBase):
""" """
# Using a generic chat completion for text-only US2 # Using a generic chat completion for text-only US2
response = await llm_client.client.chat.completions.create( # We use the shared get_json_completion method from LLMClient
model=db_provider.default_model, doc_result = await llm_client.get_json_completion([{"role": "user", "content": prompt}])
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
import json
doc_result = json.loads(response.choices[0].message.content)
# 5. Update Metadata (US2 / T026) # 5. Update Metadata (US2 / T026)
# This part normally goes to mapping_service, but we implement the logic here for the plugin flow # This part normally goes to mapping_service, but we implement the logic here for the plugin flow

View File

@@ -82,6 +82,8 @@ As a Developer, I want the system to suggest commit messages based on changes di
- What happens if the dashboard screenshot cannot be generated? (System should proceed with logs only or fail depending on configuration). - What happens if the dashboard screenshot cannot be generated? (System should proceed with logs only or fail depending on configuration).
- What happens if the context (logs/metadata) exceeds the LLM's token limit? (System should truncate or summarize input). - What happens if the context (logs/metadata) exceeds the LLM's token limit? (System should truncate or summarize input).
- How does the system handle missing API keys? (Task should fail immediately with a configuration error). - How does the system handle missing API keys? (Task should fail immediately with a configuration error).
- What happens if the dashboard has multiple tabs with lazy-loaded charts? (System must switch through all tabs recursively to trigger chart rendering before capture).
- What happens if Playwright encounters font loading timeouts in headless mode? (System must use CDP `Page.captureScreenshot` to bypass Playwright's internal timeout mechanism).
## Requirements *(mandatory)* ## Requirements *(mandatory)*
@@ -92,9 +94,11 @@ As a Developer, I want the system to suggest commit messages based on changes di
- **FR-028**: The system MUST mask all API keys in the UI and logs, displaying only the last 4 characters (e.g., `sk-...1234`). [Security] - **FR-028**: The system MUST mask all API keys in the UI and logs, displaying only the last 4 characters (e.g., `sk-...1234`). [Security]
- **FR-003**: System MUST implement a `DashboardValidationPlugin` that integrates with the existing `PluginBase` architecture. - **FR-003**: System MUST implement a `DashboardValidationPlugin` that integrates with the existing `PluginBase` architecture.
- **FR-004**: `DashboardValidationPlugin` MUST accept a dashboard identifier as input. - **FR-004**: `DashboardValidationPlugin` MUST accept a dashboard identifier as input.
- **FR-005**: `DashboardValidationPlugin` MUST be capable of retrieving a visual representation (screenshot) of the dashboard. The visual representation MUST be a PNG or JPEG image with a minimum resolution of 1280x720px to ensure legibility for the LLM. [Clarity] - **FR-005**: `DashboardValidationPlugin` MUST be capable of retrieving a visual representation (screenshot) of the dashboard. The visual representation MUST be a PNG image with a resolution of 1920px width and full page height to ensure all dashboard content is captured. [Clarity]
- **FR-016**: System MUST support configurable screenshot strategies: 'Headless Browser' (default, high accuracy) and 'API Thumbnail' (fallback/fast). - **FR-016**: System MUST support configurable screenshot strategies: 'Headless Browser' (default, high accuracy) and 'API Thumbnail' (fallback/fast).
- **FR-006**: `DashboardValidationPlugin` MUST retrieve recent execution logs associated with the dashboard, limited to the last 100 lines or 24 hours (whichever is smaller) to prevent token overflow. [Reliability] - **FR-030**: The screenshot capture MUST use Playwright with Chrome DevTools Protocol (CDP) to avoid font loading timeouts in headless mode.
- **FR-031**: The screenshot capture MUST implement recursive tab switching to trigger lazy-loaded chart rendering on multi-tab dashboards before capturing.
- **FR-006**: `DashboardValidationPlugin` MUST retrieve recent execution logs associated with the dashboard from the Environment API (e.g., `/api/v1/log/`), limited to the last 100 lines or 24 hours (whichever is smaller) to prevent token overflow. [Reliability]
- **FR-007**: `DashboardValidationPlugin` MUST combine visual and text data to prompt a Multimodal LLM for analysis. The analysis output MUST be structured as a JSON object containing `status` (Pass/Fail), `issues` (list of strings), and `summary` (text) to enable structured UI presentation. [Clarity] - **FR-007**: `DashboardValidationPlugin` MUST combine visual and text data to prompt a Multimodal LLM for analysis. The analysis output MUST be structured as a JSON object containing `status` (Pass/Fail), `issues` (list of strings), and `summary` (text) to enable structured UI presentation. [Clarity]
- **FR-008**: System MUST implement a `DocumentationPlugin` (or similar) for documenting datasets and dashboards. - **FR-008**: System MUST implement a `DocumentationPlugin` (or similar) for documenting datasets and dashboards.
- **FR-009**: `DocumentationPlugin` MUST retrieve schema and metadata for the target asset. - **FR-009**: `DocumentationPlugin` MUST retrieve schema and metadata for the target asset.
@@ -131,3 +135,5 @@ As a Developer, I want the system to suggest commit messages based on changes di
- **SC-002**: A dashboard validation task completes within 90 seconds (assuming standard LLM latency). - **SC-002**: A dashboard validation task completes within 90 seconds (assuming standard LLM latency).
- **SC-003**: The system successfully processes a multimodal prompt (image + text) and returns a structured analysis. - **SC-003**: The system successfully processes a multimodal prompt (image + text) and returns a structured analysis.
- **SC-004**: Generated documentation for a standard dataset contains descriptions for at least 80% of the columns (based on LLM capability, but pipeline must support it). - **SC-004**: Generated documentation for a standard dataset contains descriptions for at least 80% of the columns (based on LLM capability, but pipeline must support it).
- **SC-005**: Screenshots capture full dashboard content including all tabs (1920px width, full height) without font loading timeouts.
- **SC-006**: Analysis results are displayed in task logs with clear `[ANALYSIS_SUMMARY]` and `[ANALYSIS_ISSUE]` markers for easy parsing.