Вроде работает

This commit is contained in:
2026-01-30 11:10:16 +03:00
parent 8044f85ea4
commit 252a8601a9
43 changed files with 1987 additions and 270 deletions

View File

@@ -0,0 +1,11 @@
# [DEF:backend/src/plugins/llm_analysis/__init__.py:Module]
# @TIER: TRIVIAL
# @PURPOSE: Initialize the LLM Analysis plugin package.
"""
LLM Analysis Plugin for automated dashboard validation and dataset documentation.
"""
from .plugin import DashboardValidationPlugin, DocumentationPlugin
# [/DEF:backend/src/plugins/llm_analysis/__init__.py]

View File

@@ -0,0 +1,61 @@
# [DEF:backend/src/plugins/llm_analysis/models.py:Module]
# @TIER: STANDARD
# @SEMANTICS: pydantic, models, llm
# @PURPOSE: Define Pydantic models for LLM Analysis plugin.
# @LAYER: Domain
from typing import List, Optional
from pydantic import BaseModel, Field
from datetime import datetime
from enum import Enum
# [DEF:LLMProviderType:Class]
# @PURPOSE: Enum for supported LLM providers.
class LLMProviderType(str, Enum):
OPENAI = "openai"
OPENROUTER = "openrouter"
KILO = "kilo"
# [/DEF:LLMProviderType:Class]
# [DEF:LLMProviderConfig:Class]
# @PURPOSE: Configuration for an LLM provider.
class LLMProviderConfig(BaseModel):
id: Optional[str] = None
provider_type: LLMProviderType
name: str
base_url: str
api_key: str
default_model: str
is_active: bool = True
# [/DEF:LLMProviderConfig:Class]
# [DEF:ValidationStatus:Class]
# @PURPOSE: Enum for dashboard validation status.
class ValidationStatus(str, Enum):
PASS = "PASS"
WARN = "WARN"
FAIL = "FAIL"
# [/DEF:ValidationStatus:Class]
# [DEF:DetectedIssue:Class]
# @PURPOSE: Model for a single issue detected during validation.
class DetectedIssue(BaseModel):
severity: ValidationStatus
message: str
location: Optional[str] = None
# [/DEF:DetectedIssue:Class]
# [DEF:ValidationResult:Class]
# @PURPOSE: Model for dashboard validation result.
class ValidationResult(BaseModel):
id: Optional[str] = None
dashboard_id: str
timestamp: datetime = Field(default_factory=datetime.utcnow)
status: ValidationStatus
screenshot_path: Optional[str] = None
issues: List[DetectedIssue]
summary: str
raw_response: Optional[str] = None
# [/DEF:ValidationResult:Class]
# [/DEF:backend/src/plugins/llm_analysis/models.py]

View File

@@ -0,0 +1,272 @@
# [DEF:backend.src.plugins.llm_analysis.plugin:Module]
# @TIER: STANDARD
# @SEMANTICS: plugin, llm, analysis, documentation
# @PURPOSE: Implements DashboardValidationPlugin and DocumentationPlugin.
# @LAYER: Domain
# @RELATION: INHERITS_FROM -> backend.src.core.plugin_base.PluginBase
from typing import Dict, Any, Optional, List
import os
from datetime import datetime, timedelta
from ...core.plugin_base import PluginBase
from ...core.logger import belief_scope, logger
from ...core.database import SessionLocal
from ...core.config_manager import ConfigManager
from ...services.llm_provider import LLMProviderService
from .service import ScreenshotService, LLMClient
from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue
from ...models.llm import ValidationRecord
# [DEF:DashboardValidationPlugin:Class]
# @PURPOSE: Plugin for automated dashboard health analysis using LLMs.
class DashboardValidationPlugin(PluginBase):
@property
def id(self) -> str:
return "llm_dashboard_validation"
@property
def name(self) -> str:
return "Dashboard LLM Validation"
@property
def description(self) -> str:
return "Automated dashboard health analysis using multimodal LLMs."
@property
def version(self) -> str:
return "1.0.0"
def get_schema(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"dashboard_id": {"type": "string", "title": "Dashboard ID"},
"environment_id": {"type": "string", "title": "Environment ID"},
"provider_id": {"type": "string", "title": "LLM Provider ID"}
},
"required": ["dashboard_id", "environment_id", "provider_id"]
}
async def execute(self, params: Dict[str, Any]):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
dashboard_id = params.get("dashboard_id")
env_id = params.get("environment_id")
provider_id = params.get("provider_id")
task_id = params.get("_task_id")
db = SessionLocal()
try:
# 1. Get Environment
from ...dependencies import get_config_manager
config_mgr = get_config_manager()
env = config_mgr.get_environment(env_id)
if not env:
raise ValueError(f"Environment {env_id} not found")
# 2. Get LLM Provider
llm_service = LLMProviderService(db)
db_provider = llm_service.get_provider(provider_id)
if not db_provider:
raise ValueError(f"LLM Provider {provider_id} not found")
api_key = llm_service.get_decrypted_api_key(provider_id)
# 3. Capture Screenshot
screenshot_service = ScreenshotService(env)
os.makedirs("ss-tools-storage/screenshots", exist_ok=True)
screenshot_path = f"ss-tools-storage/screenshots/{dashboard_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
await screenshot_service.capture_dashboard(dashboard_id, screenshot_path)
# 4. Fetch Logs (Last 100 lines from backend.log)
logs = []
log_file = "backend.log"
if os.path.exists(log_file):
with open(log_file, "r") as f:
# Read last 100 lines
all_lines = f.readlines()
logs = all_lines[-100:]
if not logs:
logs = ["No logs found in backend.log"]
# 5. Analyze with LLM
llm_client = LLMClient(
provider_type=LLMProviderType(db_provider.provider_type),
api_key=api_key,
base_url=db_provider.base_url,
default_model=db_provider.default_model
)
analysis = await llm_client.analyze_dashboard(screenshot_path, logs)
# 6. Persist Result
validation_result = ValidationResult(
dashboard_id=dashboard_id,
status=ValidationStatus(analysis["status"]),
summary=analysis["summary"],
issues=[DetectedIssue(**issue) for issue in analysis["issues"]],
screenshot_path=screenshot_path,
raw_response=str(analysis)
)
db_record = ValidationRecord(
dashboard_id=validation_result.dashboard_id,
status=validation_result.status.value,
summary=validation_result.summary,
issues=[issue.dict() for issue in validation_result.issues],
screenshot_path=validation_result.screenshot_path,
raw_response=validation_result.raw_response
)
db.add(db_record)
db.commit()
# 7. Notification on failure (US1 / FR-015)
if validation_result.status == ValidationStatus.FAIL:
logger.warning(f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
# Placeholder for Email/Pulse notification dispatch
# In a real implementation, we would call a NotificationService here
# with a payload containing the summary and a link to the report.
return validation_result.dict()
finally:
db.close()
# [/DEF:DashboardValidationPlugin:Class]
# [DEF:DocumentationPlugin:Class]
# @PURPOSE: Plugin for automated dataset documentation using LLMs.
class DocumentationPlugin(PluginBase):
@property
def id(self) -> str:
return "llm_documentation"
@property
def name(self) -> str:
return "Dataset LLM Documentation"
@property
def description(self) -> str:
return "Automated dataset and column documentation using LLMs."
@property
def version(self) -> str:
return "1.0.0"
def get_schema(self) -> Dict[str, Any]:
return {
"type": "object",
"properties": {
"dataset_id": {"type": "string", "title": "Dataset ID"},
"environment_id": {"type": "string", "title": "Environment ID"},
"provider_id": {"type": "string", "title": "LLM Provider ID"}
},
"required": ["dataset_id", "environment_id", "provider_id"]
}
async def execute(self, params: Dict[str, Any]):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
dataset_id = params.get("dataset_id")
env_id = params.get("environment_id")
provider_id = params.get("provider_id")
db = SessionLocal()
try:
# 1. Get Environment
from ...dependencies import get_config_manager
config_mgr = get_config_manager()
env = config_mgr.get_environment(env_id)
if not env:
raise ValueError(f"Environment {env_id} not found")
# 2. Get LLM Provider
llm_service = LLMProviderService(db)
db_provider = llm_service.get_provider(provider_id)
if not db_provider:
raise ValueError(f"LLM Provider {provider_id} not found")
api_key = llm_service.get_decrypted_api_key(provider_id)
# 3. Fetch Metadata (US2 / T024)
from ...core.superset_client import SupersetClient
client = SupersetClient(env)
# Optimistic locking check (T045)
dataset = client.get_dataset(int(dataset_id))
# dataset structure might vary, ensure we get the right field
original_changed_on = dataset.get("changed_on_utc") or dataset.get("result", {}).get("changed_on_utc")
# Extract columns and existing descriptions
columns_data = []
for col in dataset.get("columns", []):
columns_data.append({
"name": col.get("column_name"),
"type": col.get("type"),
"description": col.get("description")
})
# 4. Construct Prompt & Analyze (US2 / T025)
llm_client = LLMClient(
provider_type=LLMProviderType(db_provider.provider_type),
api_key=api_key,
base_url=db_provider.base_url,
default_model=db_provider.default_model
)
prompt = f"""
Generate professional documentation for the following dataset and its columns.
Dataset: {dataset.get('table_name')}
Columns: {columns_data}
Provide the documentation in JSON format:
{{
"dataset_description": "General description of the dataset",
"column_descriptions": [
{{
"name": "column_name",
"description": "Generated description"
}}
]
}}
"""
# Using a generic chat completion for text-only US2
response = await llm_client.client.chat.completions.create(
model=db_provider.default_model,
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
import json
doc_result = json.loads(response.choices[0].message.content)
# 5. Update Metadata (US2 / T026)
# This part normally goes to mapping_service, but we implement the logic here for the plugin flow
# We'll update the dataset in Superset
update_payload = {
"description": doc_result["dataset_description"],
"columns": []
}
# Map generated descriptions back to column IDs
for col_doc in doc_result["column_descriptions"]:
for col in dataset.get("columns", []):
if col.get("column_name") == col_doc["name"]:
update_payload["columns"].append({
"id": col.get("id"),
"description": col_doc["description"]
})
client.update_dataset(int(dataset_id), update_payload)
return doc_result
finally:
db.close()
# [/DEF:DocumentationPlugin:Class]
# [/DEF:backend.src.plugins.llm_analysis.plugin:Module]

View File

@@ -0,0 +1,56 @@
# [DEF:backend/src/plugins/llm_analysis/scheduler.py:Module]
# @TIER: STANDARD
# @SEMANTICS: scheduler, task, automation
# @PURPOSE: Provides helper functions to schedule LLM-based validation tasks.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> backend.src.core.scheduler
from typing import Dict, Any
from ...dependencies import get_task_manager, get_scheduler_service
from ...core.logger import belief_scope, logger
# [DEF:schedule_dashboard_validation:Function]
# @PURPOSE: Schedules a recurring dashboard validation task.
# @PARAM: dashboard_id (str) - ID of the dashboard to validate.
# @PARAM: cron_expression (str) - Standard cron expression for scheduling.
# @PARAM: params (Dict[str, Any]) - Task parameters (environment_id, provider_id).
def schedule_dashboard_validation(dashboard_id: str, cron_expression: str, params: Dict[str, Any]):
with belief_scope("schedule_dashboard_validation", f"dashboard_id={dashboard_id}"):
scheduler = get_scheduler_service()
task_manager = get_task_manager()
job_id = f"llm_val_{dashboard_id}"
async def job_func():
await task_manager.create_task(
plugin_id="llm_dashboard_validation",
params={
"dashboard_id": dashboard_id,
**params
}
)
scheduler.add_job(
job_func,
"cron",
id=job_id,
replace_existing=True,
**_parse_cron(cron_expression)
)
logger.info(f"Scheduled validation for dashboard {dashboard_id} with cron {cron_expression}")
def _parse_cron(cron: str) -> Dict[str, str]:
# Basic cron parser placeholder
parts = cron.split()
if len(parts) != 5:
return {}
return {
"minute": parts[0],
"hour": parts[1],
"day": parts[2],
"month": parts[3],
"day_of_week": parts[4]
}
# [/DEF:schedule_dashboard_validation:Function]
# [/DEF:backend/src/plugins/llm_analysis/scheduler.py]

View File

@@ -0,0 +1,224 @@
# [DEF:backend.src.plugins.llm_analysis.service:Module]
# @TIER: STANDARD
# @SEMANTICS: service, llm, screenshot, playwright, openai
# @PURPOSE: Services for LLM interaction and dashboard screenshots.
# @LAYER: Domain
# @RELATION: DEPENDS_ON -> playwright
# @RELATION: DEPENDS_ON -> openai
# @RELATION: DEPENDS_ON -> tenacity
import asyncio
from typing import List, Optional, Dict, Any
from playwright.async_api import async_playwright
from openai import AsyncOpenAI, RateLimitError
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from .models import LLMProviderType, ValidationResult, ValidationStatus, DetectedIssue
from ...core.logger import belief_scope, logger
from ...core.config_models import Environment
# [DEF:ScreenshotService:Class]
# @PURPOSE: Handles capturing screenshots of Superset dashboards.
class ScreenshotService:
# @PRE: env is a valid Environment object.
def __init__(self, env: Environment):
self.env = env
# [DEF:capture_dashboard:Function]
# @PURPOSE: Captures a screenshot of a dashboard using Playwright.
# @PARAM: dashboard_id (str) - ID of the dashboard.
# @PARAM: output_path (str) - Path to save the screenshot.
# @RETURN: bool - True if successful.
async def capture_dashboard(self, dashboard_id: str, output_path: str) -> bool:
with belief_scope("capture_dashboard", f"dashboard_id={dashboard_id}"):
logger.info(f"Capturing screenshot for dashboard {dashboard_id}")
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={'width': 1280, 'height': 720})
page = await context.new_page()
# 1. Authenticate via API to get tokens
from ...core.superset_client import SupersetClient
client = SupersetClient(self.env)
try:
tokens = client.authenticate()
access_token = tokens.get("access_token")
# Set JWT in localStorage if possible, or use as cookie
# Superset UI uses session cookies, but we can try to set the Authorization header
# or inject the token into the session.
# For now, we'll use the token to set a cookie if we can determine the name,
# but the most reliable way for Playwright is often still the UI login
# UNLESS we use the API to set a session cookie.
logger.info("API Authentication successful")
except Exception as e:
logger.warning(f"API Authentication failed: {e}. Falling back to UI login.")
# 2. Navigate to dashboard
dashboard_url = f"{self.env.url}/superset/dashboard/{dashboard_id}/"
logger.info(f"Navigating to {dashboard_url}")
# We still go to the URL first
await page.goto(dashboard_url)
await page.wait_for_load_state("networkidle")
# 3. Check if we are redirected to login
if "/login" in page.url:
logger.info(f"Redirected to login: {page.url}. Filling credentials from Environment.")
# More exhaustive list of selectors for various Superset versions/themes
selectors = {
"username": ['input[name="username"]', 'input#username', 'input[placeholder*="Username"]'],
"password": ['input[name="password"]', 'input#password', 'input[placeholder*="Password"]'],
"submit": ['button[type="submit"]', 'button#submit', '.btn-primary']
}
try:
# Find and fill username
u_selector = None
for s in selectors["username"]:
if await page.locator(s).count() > 0:
u_selector = s
break
if not u_selector:
raise RuntimeError("Could not find username input field")
await page.fill(u_selector, self.env.username)
# Find and fill password
p_selector = None
for s in selectors["password"]:
if await page.locator(s).count() > 0:
p_selector = s
break
if not p_selector:
raise RuntimeError("Could not find password input field")
await page.fill(p_selector, self.env.password)
# Click submit
s_selector = selectors["submit"][0]
for s in selectors["submit"]:
if await page.locator(s).count() > 0:
s_selector = s
break
await page.click(s_selector)
await page.wait_for_load_state("networkidle")
# Re-verify we are at the dashboard
if "/login" in page.url:
# Check for error messages on page
error_msg = await page.locator(".alert-danger, .error-message").text_content() if await page.locator(".alert-danger, .error-message").count() > 0 else "Unknown error"
raise RuntimeError(f"Login failed after submission: {error_msg}")
if "/superset/dashboard" not in page.url:
logger.info(f"Redirecting back to dashboard after login: {dashboard_url}")
await page.goto(dashboard_url)
await page.wait_for_load_state("networkidle")
except Exception as e:
page_title = await page.title()
logger.error(f"UI Login failed. Page title: {page_title}, URL: {page.url}, Error: {str(e)}")
debug_path = output_path.replace(".png", "_debug_failed_login.png")
await page.screenshot(path=debug_path)
raise RuntimeError(f"Login failed: {str(e)}. Debug screenshot saved to {debug_path}")
# Wait a bit more for charts to render
await asyncio.sleep(5)
await page.screenshot(path=output_path, full_page=True)
await browser.close()
logger.info(f"Screenshot saved to {output_path}")
return True
# [/DEF:ScreenshotService:Class]
# [DEF:LLMClient:Class]
# @PURPOSE: Wrapper for LLM provider APIs.
class LLMClient:
def __init__(self, provider_type: LLMProviderType, api_key: str, base_url: str, default_model: str):
self.provider_type = provider_type
self.api_key = api_key
self.base_url = base_url
self.default_model = default_model
self.client = AsyncOpenAI(api_key=api_key, base_url=base_url)
# [DEF:analyze_dashboard:Function]
# @PURPOSE: Sends dashboard data to LLM for analysis.
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential(multiplier=2, min=5, max=60),
retry=retry_if_exception_type((Exception, RateLimitError))
)
async def analyze_dashboard(self, screenshot_path: str, logs: List[str]) -> Dict[str, Any]:
with belief_scope("analyze_dashboard"):
import base64
with open(screenshot_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
log_text = "\n".join(logs)
prompt = f"""
Analyze the attached dashboard screenshot and the following execution logs for health and visual issues.
Logs:
{log_text}
Provide the analysis in JSON format with the following structure:
{{
"status": "PASS" | "WARN" | "FAIL",
"summary": "Short summary of findings",
"issues": [
{{
"severity": "WARN" | "FAIL",
"message": "Description of the issue",
"location": "Optional location info (e.g. chart name)"
}}
]
}}
"""
logger.debug(f"[analyze_dashboard] Calling LLM with model: {self.default_model}")
try:
response = await self.client.chat.completions.create(
model=self.default_model,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
response_format={"type": "json_object"}
)
logger.debug(f"[analyze_dashboard] LLM Response: {response}")
except RateLimitError as e:
logger.warning(f"[analyze_dashboard] Rate limit hit: {str(e)}")
raise # tenacity will handle retry
except Exception as e:
logger.error(f"[analyze_dashboard] LLM call failed: {str(e)}")
raise
if not response or not hasattr(response, 'choices') or not response.choices:
error_info = getattr(response, 'error', 'No choices in response')
logger.error(f"[analyze_dashboard] Invalid LLM response. Error info: {error_info}")
return {
"status": "FAIL",
"summary": f"Failed to get response from LLM: {error_info}",
"issues": [{"severity": "FAIL", "message": "LLM provider returned empty or invalid response"}]
}
import json
result = json.loads(response.choices[0].message.content)
return result
# [/DEF:analyze_dashboard:Function]
# [/DEF:LLMClient:Class]
# [/DEF:backend.src.plugins.llm_analysis.service:Module]