Compare commits

...

2 Commits

Author SHA1 Message Date
0f16bab2b8 Похоже работает 2026-02-07 11:26:06 +03:00
7de96c17c4 feat(llm-plugin): switch to environment API for log retrieval
- Replace local backend.log reading with Superset API /log/ fetch
- Update DashboardValidationPlugin to use SupersetClient
- Filter logs by dashboard_id and last 24 hours
- Update spec FR-006 to reflect API usage
2026-02-06 17:57:25 +03:00
27 changed files with 62660 additions and 58755 deletions

1
backend/get_full_key.py Normal file
View File

@@ -0,0 +1 @@
{"print(f'Length": {"else": "print('Provider not found')\ndb.close()"}}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -143,6 +143,15 @@ async def test_connection(
raise HTTPException(status_code=404, detail="Provider not found")
api_key = service.get_decrypted_api_key(provider_id)
# Check if API key was successfully decrypted
if not api_key:
logger.error(f"[llm_routes][test_connection] Failed to decrypt API key for provider {provider_id}")
raise HTTPException(
status_code=500,
detail="Failed to decrypt API key. The provider may have been encrypted with a different encryption key. Please update the provider with a new API key."
)
client = LLMClient(
provider_type=LLMProviderType(db_provider.provider_type),
api_key=api_key,
@@ -173,6 +182,13 @@ async def test_provider_config(
from ...plugins.llm_analysis.service import LLMClient
logger.info(f"[llm_routes][test_provider_config][Action] Testing config for {config.name}")
# Check if API key is provided
if not config.api_key or config.api_key == "********":
raise HTTPException(
status_code=400,
detail="API key is required for testing connection"
)
client = LLMClient(
provider_type=config.provider_type,
api_key=config.api_key,

View File

@@ -18,6 +18,7 @@ import asyncio
import os
from .dependencies import get_task_manager, get_scheduler_service
from .core.utils.network import NetworkError
from .core.logger import logger, belief_scope
from .api.routes import plugins, tasks, settings, environments, mappings, migration, connections, git, storage, admin, llm
from .api import auth
@@ -77,13 +78,34 @@ app.add_middleware(
# @POST: Logs request and response details.
# @PARAM: request (Request) - The incoming request object.
# @PARAM: call_next (Callable) - The next middleware or route handler.
@app.exception_handler(NetworkError)
async def network_error_handler(request: Request, exc: NetworkError):
with belief_scope("network_error_handler"):
logger.error(f"Network error: {exc}")
return HTTPException(
status_code=503,
detail="Environment unavailable. Please check if the Superset instance is running."
)
@app.middleware("http")
async def log_requests(request: Request, call_next):
with belief_scope("log_requests", f"{request.method} {request.url.path}"):
logger.info(f"[DEBUG] Incoming request: {request.method} {request.url.path}")
# Avoid spamming logs for polling endpoints
is_polling = request.url.path.endswith("/api/tasks") and request.method == "GET"
if not is_polling:
logger.info(f"Incoming request: {request.method} {request.url.path}")
try:
response = await call_next(request)
logger.info(f"[DEBUG] Response status: {response.status_code} for {request.url.path}")
if not is_polling:
logger.info(f"Response status: {response.status_code} for {request.url.path}")
return response
except NetworkError as e:
logger.error(f"Network error caught in middleware: {e}")
raise HTTPException(
status_code=503,
detail="Environment unavailable. Please check if the Superset instance is running."
)
# [/DEF:log_requests:Function]
# Include API routes

View File

@@ -36,6 +36,7 @@ class TaskPersistenceService:
# @PRE: isinstance(task, Task)
# @POST: Task record created or updated in database.
# @PARAM: task (Task) - The task object to persist.
# @SIDE_EFFECT: Writes to task_records table in tasks.db
def persist_task(self, task: Task) -> None:
with belief_scope("TaskPersistenceService.persist_task", f"task_id={task.id}"):
session: Session = TasksSessionLocal()
@@ -50,8 +51,19 @@ class TaskPersistenceService:
record.environment_id = task.params.get("environment_id") or task.params.get("source_env_id")
record.started_at = task.started_at
record.finished_at = task.finished_at
record.params = task.params
record.result = task.result
# Ensure params and result are JSON serializable
def json_serializable(obj):
if isinstance(obj, dict):
return {k: json_serializable(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [json_serializable(v) for v in obj]
elif isinstance(obj, datetime):
return obj.isoformat()
return obj
record.params = json_serializable(task.params)
record.result = json_serializable(task.result)
# Store logs as JSON, converting datetime to string
record.logs = []
@@ -59,6 +71,9 @@ class TaskPersistenceService:
log_dict = log.dict()
if isinstance(log_dict.get('timestamp'), datetime):
log_dict['timestamp'] = log_dict['timestamp'].isoformat()
# Also clean up any datetimes in context
if log_dict.get('context'):
log_dict['context'] = json_serializable(log_dict['context'])
record.logs.append(log_dict)
# Extract error if failed

View File

@@ -140,7 +140,16 @@ class APIClient:
app_logger.info("[authenticate][Enter] Authenticating to %s", self.base_url)
try:
login_url = f"{self.base_url}/security/login"
# Log the payload keys and values (masking password)
masked_auth = {k: ("******" if k == "password" else v) for k, v in self.auth.items()}
app_logger.info(f"[authenticate][Debug] Login URL: {login_url}")
app_logger.info(f"[authenticate][Debug] Auth payload: {masked_auth}")
response = self.session.post(login_url, json=self.auth, timeout=self.request_settings["timeout"])
if response.status_code != 200:
app_logger.error(f"[authenticate][Error] Status: {response.status_code}, Response: {response.text}")
response.raise_for_status()
access_token = response.json()["access_token"]
@@ -153,6 +162,9 @@ class APIClient:
app_logger.info("[authenticate][Exit] Authenticated successfully.")
return self._tokens
except requests.exceptions.HTTPError as e:
status_code = e.response.status_code if e.response is not None else None
if status_code in [502, 503, 504]:
raise NetworkError(f"Environment unavailable during authentication (Status {status_code})", status_code=status_code) from e
raise AuthenticationError(f"Authentication failed: {e}") from e
except (requests.exceptions.RequestException, KeyError) as e:
raise NetworkError(f"Network or parsing error during authentication: {e}") from e
@@ -209,6 +221,8 @@ class APIClient:
def _handle_http_error(self, e: requests.exceptions.HTTPError, endpoint: str):
with belief_scope("_handle_http_error"):
status_code = e.response.status_code
if status_code == 502 or status_code == 503 or status_code == 504:
raise NetworkError(f"Environment unavailable (Status {status_code})", status_code=status_code) from e
if status_code == 404: raise DashboardNotFoundError(endpoint) from e
if status_code == 403: raise PermissionDeniedError() from e
if status_code == 401: raise AuthenticationError() from e

View File

@@ -35,7 +35,6 @@ init_db()
# @RETURN: ConfigManager - The shared config manager instance.
def get_config_manager() -> ConfigManager:
"""Dependency injector for the ConfigManager."""
with belief_scope("get_config_manager"):
return config_manager
# [/DEF:get_config_manager:Function]
@@ -58,7 +57,6 @@ logger.info("SchedulerService initialized")
# @RETURN: PluginLoader - The shared plugin loader instance.
def get_plugin_loader() -> PluginLoader:
"""Dependency injector for the PluginLoader."""
with belief_scope("get_plugin_loader"):
return plugin_loader
# [/DEF:get_plugin_loader:Function]
@@ -69,7 +67,6 @@ def get_plugin_loader() -> PluginLoader:
# @RETURN: TaskManager - The shared task manager instance.
def get_task_manager() -> TaskManager:
"""Dependency injector for the TaskManager."""
with belief_scope("get_task_manager"):
return task_manager
# [/DEF:get_task_manager:Function]
@@ -80,7 +77,6 @@ def get_task_manager() -> TaskManager:
# @RETURN: SchedulerService - The shared scheduler service instance.
def get_scheduler_service() -> SchedulerService:
"""Dependency injector for the SchedulerService."""
with belief_scope("get_scheduler_service"):
return scheduler_service
# [/DEF:get_scheduler_service:Function]
@@ -98,7 +94,6 @@ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/auth/login")
# @PARAM: db (Session) - Auth database session.
# @RETURN: User - The authenticated user.
def get_current_user(token: str = Depends(oauth2_scheme), db = Depends(get_auth_db)):
with belief_scope("get_current_user"):
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
@@ -129,7 +124,6 @@ def get_current_user(token: str = Depends(oauth2_scheme), db = Depends(get_auth_
# @RETURN: User - The authenticated user if permission granted.
def has_permission(resource: str, action: str):
def permission_checker(current_user: User = Depends(get_current_user)):
with belief_scope("has_permission", f"{resource}:{action}"):
# Union of all permissions across all roles
for role in current_user.roles:
for perm in role.permissions:

View File

@@ -1,6 +1,7 @@
# [DEF:backend/src/plugins/llm_analysis/__init__.py:Module]
# @TIER: TRIVIAL
# @PURPOSE: Initialize the LLM Analysis plugin package.
# @LAYER: Domain
"""
LLM Analysis Plugin for automated dashboard validation and dataset documentation.
@@ -8,4 +9,4 @@ LLM Analysis Plugin for automated dashboard validation and dataset documentation
from .plugin import DashboardValidationPlugin, DocumentationPlugin
# [/DEF:backend/src/plugins/llm_analysis/__init__.py]
# [/DEF:backend/src/plugins/llm_analysis/__init__.py:Module]

View File

@@ -24,7 +24,7 @@ class LLMProviderConfig(BaseModel):
provider_type: LLMProviderType
name: str
base_url: str
api_key: str
api_key: Optional[str] = None
default_model: str
is_active: bool = True
# [/DEF:LLMProviderConfig:Class]
@@ -58,4 +58,4 @@ class ValidationResult(BaseModel):
raw_response: Optional[str] = None
# [/DEF:ValidationResult:Class]
# [/DEF:backend/src/plugins/llm_analysis/models.py]
# [/DEF:backend/src/plugins/llm_analysis/models.py:Module]

View File

@@ -1,24 +1,32 @@
# [DEF:backend.src.plugins.llm_analysis.plugin:Module]
# [DEF:backend/src/plugins/llm_analysis/plugin.py:Module]
# @TIER: STANDARD
# @SEMANTICS: plugin, llm, analysis, documentation
# @PURPOSE: Implements DashboardValidationPlugin and DocumentationPlugin.
# @LAYER: Domain
# @RELATION: INHERITS_FROM -> backend.src.core.plugin_base.PluginBase
# @RELATION: INHERITS -> backend.src.core.plugin_base.PluginBase
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.ScreenshotService
# @RELATION: CALLS -> backend.src.plugins.llm_analysis.service.LLMClient
# @RELATION: CALLS -> backend.src.services.llm_provider.LLMProviderService
# @INVARIANT: All LLM interactions must be executed as asynchronous tasks.
from typing import Dict, Any, Optional, List
import os
import json
import logging
from datetime import datetime, timedelta
from ...core.plugin_base import PluginBase
from ...core.logger import belief_scope, logger
from ...core.database import SessionLocal
from ...core.config_manager import ConfigManager
from ...services.llm_provider import LLMProviderService
from ...core.superset_client import SupersetClient
from .service import ScreenshotService, LLMClient
from .models import LLMProviderType, ValidationStatus, ValidationResult, DetectedIssue
from ...models.llm import ValidationRecord
# [DEF:DashboardValidationPlugin:Class]
# @PURPOSE: Plugin for automated dashboard health analysis using LLMs.
# @RELATION: IMPLEMENTS -> backend.src.core.plugin_base.PluginBase
class DashboardValidationPlugin(PluginBase):
@property
def id(self) -> str:
@@ -47,6 +55,11 @@ class DashboardValidationPlugin(PluginBase):
"required": ["dashboard_id", "environment_id", "provider_id"]
}
# [DEF:DashboardValidationPlugin.execute:Function]
# @PURPOSE: Executes the dashboard validation task.
# @PRE: params contains dashboard_id, environment_id, and provider_id.
# @POST: Returns a dictionary with validation results and persists them to the database.
# @SIDE_EFFECT: Captures a screenshot, calls LLM API, and writes to the database.
async def execute(self, params: Dict[str, Any]):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
@@ -56,6 +69,16 @@ class DashboardValidationPlugin(PluginBase):
provider_id = params.get("provider_id")
task_id = params.get("_task_id")
# Helper to log to both app logger and task manager logs
def task_log(level: str, message: str, context: Optional[Dict] = None):
logger.log(getattr(logging, level.upper()), message)
if task_id:
from ...dependencies import get_task_manager
try:
tm = get_task_manager()
tm._add_log(task_id, level.upper(), message, context)
except: pass
db = SessionLocal()
try:
# 1. Get Environment
@@ -71,26 +94,78 @@ class DashboardValidationPlugin(PluginBase):
if not db_provider:
raise ValueError(f"LLM Provider {provider_id} not found")
logger.info(f"[DashboardValidationPlugin.execute] Retrieved provider config:")
logger.info(f"[DashboardValidationPlugin.execute] Provider ID: {db_provider.id}")
logger.info(f"[DashboardValidationPlugin.execute] Provider Name: {db_provider.name}")
logger.info(f"[DashboardValidationPlugin.execute] Provider Type: {db_provider.provider_type}")
logger.info(f"[DashboardValidationPlugin.execute] Base URL: {db_provider.base_url}")
logger.info(f"[DashboardValidationPlugin.execute] Default Model: {db_provider.default_model}")
logger.info(f"[DashboardValidationPlugin.execute] Is Active: {db_provider.is_active}")
api_key = llm_service.get_decrypted_api_key(provider_id)
logger.info(f"[DashboardValidationPlugin.execute] API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
logger.info(f"[DashboardValidationPlugin.execute] API Key Length: {len(api_key) if api_key else 0}")
# Check if API key was successfully decrypted
if not api_key:
raise ValueError(
f"Failed to decrypt API key for provider {provider_id}. "
f"The provider may have been encrypted with a different encryption key. "
f"Please update the provider with a new API key through the UI."
)
# 3. Capture Screenshot
screenshot_service = ScreenshotService(env)
os.makedirs("ss-tools-storage/screenshots", exist_ok=True)
screenshot_path = f"ss-tools-storage/screenshots/{dashboard_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
storage_root = config_mgr.get_config().settings.storage.root_path
screenshots_dir = os.path.join(storage_root, "screenshots")
os.makedirs(screenshots_dir, exist_ok=True)
filename = f"{dashboard_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
screenshot_path = os.path.join(screenshots_dir, filename)
await screenshot_service.capture_dashboard(dashboard_id, screenshot_path)
# 4. Fetch Logs (Last 100 lines from backend.log)
# 4. Fetch Logs (from Environment /api/v1/log/)
logs = []
log_file = "backend.log"
if os.path.exists(log_file):
with open(log_file, "r") as f:
# Read last 100 lines
all_lines = f.readlines()
logs = all_lines[-100:]
try:
client = SupersetClient(env)
# Calculate time window (last 24 hours)
start_time = (datetime.now() - timedelta(hours=24)).isoformat()
# Construct filter for logs
# Note: We filter by dashboard_id matching the object
query_params = {
"filters": [
{"col": "dashboard_id", "opr": "eq", "value": dashboard_id},
{"col": "dttm", "opr": "gt", "value": start_time}
],
"order_column": "dttm",
"order_direction": "desc",
"page": 0,
"page_size": 100
}
response = client.network.request(
method="GET",
endpoint="/log/",
params={"q": json.dumps(query_params)}
)
if isinstance(response, dict) and "result" in response:
for item in response["result"]:
action = item.get("action", "unknown")
dttm = item.get("dttm", "")
details = item.get("json", "")
logs.append(f"[{dttm}] {action}: {details}")
if not logs:
logs = ["No logs found in backend.log"]
logs = ["No recent logs found for this dashboard."]
except Exception as e:
logger.warning(f"Failed to fetch logs from environment: {e}")
logs = [f"Error fetching remote logs: {str(e)}"]
# 5. Analyze with LLM
llm_client = LLMClient(
@@ -102,6 +177,13 @@ class DashboardValidationPlugin(PluginBase):
analysis = await llm_client.analyze_dashboard(screenshot_path, logs)
# Log analysis summary to task logs for better visibility
task_log("INFO", f"[ANALYSIS_SUMMARY] Status: {analysis['status']}")
task_log("INFO", f"[ANALYSIS_SUMMARY] Summary: {analysis['summary']}")
if analysis.get("issues"):
for i, issue in enumerate(analysis["issues"]):
task_log("INFO", f"[ANALYSIS_ISSUE][{i+1}] {issue.get('severity')}: {issue.get('message')} (Location: {issue.get('location', 'N/A')})")
# 6. Persist Result
validation_result = ValidationResult(
dashboard_id=dashboard_id,
@@ -125,19 +207,24 @@ class DashboardValidationPlugin(PluginBase):
# 7. Notification on failure (US1 / FR-015)
if validation_result.status == ValidationStatus.FAIL:
logger.warning(f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
task_log("WARNING", f"Dashboard {dashboard_id} validation FAILED. Summary: {validation_result.summary}")
# Placeholder for Email/Pulse notification dispatch
# In a real implementation, we would call a NotificationService here
# with a payload containing the summary and a link to the report.
# Final log to ensure all analysis is visible in task logs
task_log("INFO", f"Validation completed for dashboard {dashboard_id}. Status: {validation_result.status.value}")
return validation_result.dict()
finally:
db.close()
# [/DEF:DashboardValidationPlugin.execute:Function]
# [/DEF:DashboardValidationPlugin:Class]
# [DEF:DocumentationPlugin:Class]
# @PURPOSE: Plugin for automated dataset documentation using LLMs.
# @RELATION: IMPLEMENTS -> backend.src.core.plugin_base.PluginBase
class DocumentationPlugin(PluginBase):
@property
def id(self) -> str:
@@ -166,6 +253,11 @@ class DocumentationPlugin(PluginBase):
"required": ["dataset_id", "environment_id", "provider_id"]
}
# [DEF:DocumentationPlugin.execute:Function]
# @PURPOSE: Executes the dataset documentation task.
# @PRE: params contains dataset_id, environment_id, and provider_id.
# @POST: Returns generated documentation and updates the dataset in Superset.
# @SIDE_EFFECT: Calls LLM API and updates dataset metadata in Superset.
async def execute(self, params: Dict[str, Any]):
with belief_scope("execute", f"plugin_id={self.id}"):
logger.info(f"Executing {self.name} with params: {params}")
@@ -189,7 +281,25 @@ class DocumentationPlugin(PluginBase):
if not db_provider:
raise ValueError(f"LLM Provider {provider_id} not found")
logger.info(f"[DocumentationPlugin.execute] Retrieved provider config:")
logger.info(f"[DocumentationPlugin.execute] Provider ID: {db_provider.id}")
logger.info(f"[DocumentationPlugin.execute] Provider Name: {db_provider.name}")
logger.info(f"[DocumentationPlugin.execute] Provider Type: {db_provider.provider_type}")
logger.info(f"[DocumentationPlugin.execute] Base URL: {db_provider.base_url}")
logger.info(f"[DocumentationPlugin.execute] Default Model: {db_provider.default_model}")
logger.info(f"[DocumentationPlugin.execute] Is Active: {db_provider.is_active}")
api_key = llm_service.get_decrypted_api_key(provider_id)
logger.info(f"[DocumentationPlugin.execute] API Key decrypted (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
logger.info(f"[DocumentationPlugin.execute] API Key Length: {len(api_key) if api_key else 0}")
# Check if API key was successfully decrypted
if not api_key:
raise ValueError(
f"Failed to decrypt API key for provider {provider_id}. "
f"The provider may have been encrypted with a different encryption key. "
f"Please update the provider with a new API key through the UI."
)
# 3. Fetch Metadata (US2 / T024)
from ...core.superset_client import SupersetClient
@@ -235,14 +345,8 @@ class DocumentationPlugin(PluginBase):
"""
# Using a generic chat completion for text-only US2
response = await llm_client.client.chat.completions.create(
model=db_provider.default_model,
messages=[{"role": "user", "content": prompt}],
response_format={"type": "json_object"}
)
import json
doc_result = json.loads(response.choices[0].message.content)
# We use the shared get_json_completion method from LLMClient
doc_result = await llm_client.get_json_completion([{"role": "user", "content": prompt}])
# 5. Update Metadata (US2 / T026)
# This part normally goes to mapping_service, but we implement the logic here for the plugin flow
@@ -267,6 +371,7 @@ class DocumentationPlugin(PluginBase):
finally:
db.close()
# [/DEF:DocumentationPlugin.execute:Function]
# [/DEF:DocumentationPlugin:Class]
# [/DEF:backend.src.plugins.llm_analysis.plugin:Module]
# [/DEF:backend/src/plugins/llm_analysis/plugin.py:Module]

View File

@@ -14,6 +14,7 @@ from ...core.logger import belief_scope, logger
# @PARAM: dashboard_id (str) - ID of the dashboard to validate.
# @PARAM: cron_expression (str) - Standard cron expression for scheduling.
# @PARAM: params (Dict[str, Any]) - Task parameters (environment_id, provider_id).
# @SIDE_EFFECT: Adds a job to the scheduler service.
def schedule_dashboard_validation(dashboard_id: str, cron_expression: str, params: Dict[str, Any]):
with belief_scope("schedule_dashboard_validation", f"dashboard_id={dashboard_id}"):
scheduler = get_scheduler_service()
@@ -39,6 +40,10 @@ def schedule_dashboard_validation(dashboard_id: str, cron_expression: str, param
)
logger.info(f"Scheduled validation for dashboard {dashboard_id} with cron {cron_expression}")
# [DEF:_parse_cron:Function]
# @PURPOSE: Basic cron parser placeholder.
# @PARAM: cron (str) - Cron expression.
# @RETURN: Dict[str, str] - Parsed cron parts.
def _parse_cron(cron: str) -> Dict[str, str]:
# Basic cron parser placeholder
parts = cron.split()
@@ -51,6 +56,5 @@ def _parse_cron(cron: str) -> Dict[str, str]:
"month": parts[3],
"day_of_week": parts[4]
}
# [/DEF:schedule_dashboard_validation:Function]
# [/DEF:backend/src/plugins/llm_analysis/scheduler.py]
# [/DEF:backend/src/plugins/llm_analysis/scheduler.py:Module]

View File

@@ -1,4 +1,4 @@
# [DEF:backend.src.plugins.llm_analysis.service:Module]
# [DEF:backend/src/plugins/llm_analysis/service.py:Module]
# @TIER: STANDARD
# @SEMANTICS: service, llm, screenshot, playwright, openai
# @PURPOSE: Services for LLM interaction and dashboard screenshots.
@@ -6,12 +6,17 @@
# @RELATION: DEPENDS_ON -> playwright
# @RELATION: DEPENDS_ON -> openai
# @RELATION: DEPENDS_ON -> tenacity
# @INVARIANT: Screenshots must be 1920px width and capture full page height.
import asyncio
import base64
import json
import io
from typing import List, Optional, Dict, Any
from PIL import Image
from playwright.async_api import async_playwright
from openai import AsyncOpenAI, RateLimitError
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from openai import AsyncOpenAI, RateLimitError, AuthenticationError as OpenAIAuthenticationError
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception
from .models import LLMProviderType, ValidationResult, ValidationStatus, DetectedIssue
from ...core.logger import belief_scope, logger
from ...core.config_models import Environment
@@ -19,70 +24,102 @@ from ...core.config_models import Environment
# [DEF:ScreenshotService:Class]
# @PURPOSE: Handles capturing screenshots of Superset dashboards.
class ScreenshotService:
# [DEF:ScreenshotService.__init__:Function]
# @PURPOSE: Initializes the ScreenshotService with environment configuration.
# @PRE: env is a valid Environment object.
def __init__(self, env: Environment):
self.env = env
# [/DEF:ScreenshotService.__init__:Function]
# [DEF:capture_dashboard:Function]
# @PURPOSE: Captures a screenshot of a dashboard using Playwright.
# @PARAM: dashboard_id (str) - ID of the dashboard.
# @PARAM: output_path (str) - Path to save the screenshot.
# @RETURN: bool - True if successful.
# [DEF:ScreenshotService.capture_dashboard:Function]
# @PURPOSE: Captures a full-page screenshot of a dashboard using Playwright and CDP.
# @PRE: dashboard_id is a valid string, output_path is a writable path.
# @POST: Returns True if screenshot is saved successfully.
# @SIDE_EFFECT: Launches a browser, performs UI login, switches tabs, and writes a PNG file.
# @UX_STATE: [Navigating] -> Loading dashboard UI
# @UX_STATE: [TabSwitching] -> Iterating through dashboard tabs to trigger lazy loading
# @UX_STATE: [CalculatingHeight] -> Determining dashboard dimensions
# @UX_STATE: [Capturing] -> Executing CDP screenshot
async def capture_dashboard(self, dashboard_id: str, output_path: str) -> bool:
with belief_scope("capture_dashboard", f"dashboard_id={dashboard_id}"):
logger.info(f"Capturing screenshot for dashboard {dashboard_id}")
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(viewport={'width': 1280, 'height': 720})
browser = await p.chromium.launch(
headless=True,
args=[
"--disable-blink-features=AutomationControlled",
"--disable-infobars",
"--no-sandbox"
]
)
# Set a realistic user agent to avoid 403 Forbidden from OpenResty/WAF
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
# Construct base UI URL from environment (strip /api/v1 suffix)
base_ui_url = self.env.url.rstrip("/")
if base_ui_url.endswith("/api/v1"):
base_ui_url = base_ui_url[:-len("/api/v1")]
# Create browser context with realistic headers
context = await browser.new_context(
viewport={'width': 1280, 'height': 720},
user_agent=user_agent,
extra_http_headers={
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1"
}
)
logger.info("Browser context created successfully")
page = await context.new_page()
# Bypass navigator.webdriver detection
await page.add_init_script("delete Object.getPrototypeOf(navigator).webdriver")
# 1. Authenticate via API to get tokens
from ...core.superset_client import SupersetClient
client = SupersetClient(self.env)
try:
tokens = client.authenticate()
access_token = tokens.get("access_token")
# 1. Navigate to login page and authenticate
login_url = f"{base_ui_url.rstrip('/')}/login/"
logger.info(f"[DEBUG] Navigating to login page: {login_url}")
# Set JWT in localStorage if possible, or use as cookie
# Superset UI uses session cookies, but we can try to set the Authorization header
# or inject the token into the session.
# For now, we'll use the token to set a cookie if we can determine the name,
# but the most reliable way for Playwright is often still the UI login
# UNLESS we use the API to set a session cookie.
logger.info("API Authentication successful")
except Exception as e:
logger.warning(f"API Authentication failed: {e}. Falling back to UI login.")
response = await page.goto(login_url, wait_until="networkidle", timeout=60000)
if response:
logger.info(f"[DEBUG] Login page response status: {response.status}")
# 2. Navigate to dashboard
dashboard_url = f"{self.env.url}/superset/dashboard/{dashboard_id}/"
logger.info(f"Navigating to {dashboard_url}")
# We still go to the URL first
await page.goto(dashboard_url)
await page.wait_for_load_state("networkidle")
# 3. Check if we are redirected to login
if "/login" in page.url:
logger.info(f"Redirected to login: {page.url}. Filling credentials from Environment.")
# Wait for login form to be ready
await page.wait_for_load_state("domcontentloaded")
# More exhaustive list of selectors for various Superset versions/themes
selectors = {
"username": ['input[name="username"]', 'input#username', 'input[placeholder*="Username"]'],
"password": ['input[name="password"]', 'input#password', 'input[placeholder*="Password"]'],
"submit": ['button[type="submit"]', 'button#submit', '.btn-primary']
"username": ['input[name="username"]', 'input#username', 'input[placeholder*="Username"]', 'input[type="text"]'],
"password": ['input[name="password"]', 'input#password', 'input[placeholder*="Password"]', 'input[type="password"]'],
"submit": ['button[type="submit"]', 'button#submit', '.btn-primary', 'input[type="submit"]']
}
logger.info(f"[DEBUG] Attempting to find login form elements...")
try:
# Find and fill username
u_selector = None
for s in selectors["username"]:
if await page.locator(s).count() > 0:
count = await page.locator(s).count()
logger.info(f"[DEBUG] Selector '{s}': {count} elements found")
if count > 0:
u_selector = s
break
if not u_selector:
raise RuntimeError("Could not find username input field")
# Log all input fields on the page for debugging
all_inputs = await page.locator('input').all()
logger.info(f"[DEBUG] Found {len(all_inputs)} input fields on page")
for i, inp in enumerate(all_inputs[:5]): # Log first 5
inp_type = await inp.get_attribute('type')
inp_name = await inp.get_attribute('name')
inp_id = await inp.get_attribute('id')
logger.info(f"[DEBUG] Input {i}: type={inp_type}, name={inp_name}, id={inp_id}")
raise RuntimeError("Could not find username input field on login page")
logger.info(f"[DEBUG] Filling username field with selector: {u_selector}")
await page.fill(u_selector, self.env.username)
# Find and fill password
@@ -93,8 +130,9 @@ class ScreenshotService:
break
if not p_selector:
raise RuntimeError("Could not find password input field")
raise RuntimeError("Could not find password input field on login page")
logger.info(f"[DEBUG] Filling password field with selector: {p_selector}")
await page.fill(p_selector, self.env.password)
# Click submit
@@ -104,19 +142,28 @@ class ScreenshotService:
s_selector = s
break
logger.info(f"[DEBUG] Clicking submit button with selector: {s_selector}")
await page.click(s_selector)
await page.wait_for_load_state("networkidle")
# Re-verify we are at the dashboard
# Wait for navigation after login
await page.wait_for_load_state("networkidle", timeout=30000)
# Check if login was successful
if "/login" in page.url:
# Check for error messages on page
error_msg = await page.locator(".alert-danger, .error-message").text_content() if await page.locator(".alert-danger, .error-message").count() > 0 else "Unknown error"
raise RuntimeError(f"Login failed after submission: {error_msg}")
logger.error(f"[DEBUG] Login failed. Still on login page. Error: {error_msg}")
debug_path = output_path.replace(".png", "_debug_failed_login.png")
await page.screenshot(path=debug_path)
raise RuntimeError(f"Login failed: {error_msg}. Debug screenshot saved to {debug_path}")
if "/superset/dashboard" not in page.url:
logger.info(f"Redirecting back to dashboard after login: {dashboard_url}")
await page.goto(dashboard_url)
await page.wait_for_load_state("networkidle")
logger.info(f"[DEBUG] Login successful. Current URL: {page.url}")
# Check cookies after successful login
page_cookies = await context.cookies()
logger.info(f"[DEBUG] Cookies after login: {len(page_cookies)}")
for c in page_cookies:
logger.info(f"[DEBUG] Cookie: name={c['name']}, domain={c['domain']}, value={c.get('value', '')[:20]}...")
except Exception as e:
page_title = await page.title()
@@ -124,37 +171,412 @@ class ScreenshotService:
debug_path = output_path.replace(".png", "_debug_failed_login.png")
await page.screenshot(path=debug_path)
raise RuntimeError(f"Login failed: {str(e)}. Debug screenshot saved to {debug_path}")
# Wait a bit more for charts to render
await asyncio.sleep(5)
await page.screenshot(path=output_path, full_page=True)
# 2. Navigate to dashboard
# @UX_STATE: [Navigating] -> Loading dashboard UI
dashboard_url = f"{base_ui_url.rstrip('/')}/superset/dashboard/{dashboard_id}/?standalone=true"
if base_ui_url.startswith("https://") and dashboard_url.startswith("http://"):
dashboard_url = dashboard_url.replace("http://", "https://")
logger.info(f"[DEBUG] Navigating to dashboard: {dashboard_url}")
# Use networkidle to ensure all initial assets are loaded
response = await page.goto(dashboard_url, wait_until="networkidle", timeout=60000)
if response:
logger.info(f"[DEBUG] Dashboard navigation response status: {response.status}, URL: {response.url}")
try:
# Wait for the dashboard grid to be present
await page.wait_for_selector('.dashboard-component, .dashboard-header, [data-test="dashboard-grid"]', timeout=30000)
logger.info(f"[DEBUG] Dashboard container loaded")
# Wait for charts to finish loading (Superset uses loading spinners/skeletons)
# We wait until loading indicators disappear or a timeout occurs
try:
# Wait for loading indicators to disappear
await page.wait_for_selector('.loading, .ant-skeleton, .spinner', state="hidden", timeout=60000)
logger.info(f"[DEBUG] Loading indicators hidden")
except:
logger.warning(f"[DEBUG] Timeout waiting for loading indicators to hide")
# Wait for charts to actually render their content (e.g., ECharts, NVD3)
# We look for common chart containers that should have content
try:
await page.wait_for_selector('.chart-container canvas, .slice_container svg, .superset-chart-canvas, .grid-content .chart-container', timeout=60000)
logger.info(f"[DEBUG] Chart content detected")
except:
logger.warning(f"[DEBUG] Timeout waiting for chart content")
# Additional check: wait for all chart containers to have non-empty content
logger.info(f"[DEBUG] Waiting for all charts to have rendered content...")
await page.wait_for_function("""() => {
const charts = document.querySelectorAll('.chart-container, .slice_container');
if (charts.length === 0) return true; // No charts to wait for
// Check if all charts have rendered content (canvas, svg, or non-empty div)
return Array.from(charts).every(chart => {
const hasCanvas = chart.querySelector('canvas') !== null;
const hasSvg = chart.querySelector('svg') !== null;
const hasContent = chart.innerText.trim().length > 0 || chart.children.length > 0;
return hasCanvas || hasSvg || hasContent;
});
}""", timeout=60000)
logger.info(f"[DEBUG] All charts have rendered content")
# Scroll to bottom and back to top to trigger lazy loading of all charts
logger.info(f"[DEBUG] Scrolling to trigger lazy loading...")
await page.evaluate("""async () => {
const delay = ms => new Promise(resolve => setTimeout(resolve, ms));
for (let i = 0; i < document.body.scrollHeight; i += 500) {
window.scrollTo(0, i);
await delay(100);
}
window.scrollTo(0, 0);
await delay(500);
}""")
except Exception as e:
logger.warning(f"[DEBUG] Dashboard content wait failed: {e}, proceeding anyway after delay")
# Final stabilization delay - increased for complex dashboards
logger.info(f"[DEBUG] Final stabilization delay...")
await asyncio.sleep(15)
# Logic to handle tabs and full-page capture
try:
# 1. Handle Tabs (Recursive switching)
# @UX_STATE: [TabSwitching] -> Iterating through dashboard tabs to trigger lazy loading
processed_tabs = set()
async def switch_tabs(depth=0):
if depth > 3: return # Limit recursion depth
tab_selectors = [
'.ant-tabs-nav-list .ant-tabs-tab',
'.dashboard-component-tabs .ant-tabs-tab',
'[data-test="dashboard-component-tabs"] .ant-tabs-tab'
]
found_tabs = []
for selector in tab_selectors:
found_tabs = await page.locator(selector).all()
if found_tabs: break
if found_tabs:
logger.info(f"[DEBUG][TabSwitching] Found {len(found_tabs)} tabs at depth {depth}")
for i, tab in enumerate(found_tabs):
try:
tab_text = (await tab.inner_text()).strip()
tab_id = f"{depth}_{i}_{tab_text}"
if tab_id in processed_tabs:
continue
if await tab.is_visible():
logger.info(f"[DEBUG][TabSwitching] Switching to tab: {tab_text}")
processed_tabs.add(tab_id)
is_active = "ant-tabs-tab-active" in (await tab.get_attribute("class") or "")
if not is_active:
await tab.click()
await asyncio.sleep(2) # Wait for content to render
await switch_tabs(depth + 1)
except Exception as tab_e:
logger.warning(f"[DEBUG][TabSwitching] Failed to process tab {i}: {tab_e}")
try:
first_tab = found_tabs[0]
if "ant-tabs-tab-active" not in (await first_tab.get_attribute("class") or ""):
await first_tab.click()
await asyncio.sleep(1)
except: pass
await switch_tabs()
# 2. Calculate full height for screenshot
# @UX_STATE: [CalculatingHeight] -> Determining dashboard dimensions
full_height = await page.evaluate("""() => {
const body = document.body;
const html = document.documentElement;
const dashboardContent = document.querySelector('.dashboard-content');
return Math.max(
body.scrollHeight, body.offsetHeight,
html.clientHeight, html.scrollHeight, html.offsetHeight,
dashboardContent ? dashboardContent.scrollHeight + 100 : 0
);
}""")
logger.info(f"[DEBUG] Calculated full height: {full_height}")
# DIAGNOSTIC: Count chart elements before resize
chart_count_before = await page.evaluate("""() => {
return {
chartContainers: document.querySelectorAll('.chart-container, .slice_container').length,
canvasElements: document.querySelectorAll('canvas').length,
svgElements: document.querySelectorAll('.chart-container svg, .slice_container svg').length,
visibleCharts: document.querySelectorAll('.chart-container:visible, .slice_container:visible').length
};
}""")
logger.info(f"[DIAGNOSTIC] Chart elements BEFORE viewport resize: {chart_count_before}")
# DIAGNOSTIC: Capture pre-resize screenshot for comparison
pre_resize_path = output_path.replace(".png", "_preresize.png")
try:
await page.screenshot(path=pre_resize_path, full_page=False, timeout=10000)
import os
pre_resize_size = os.path.getsize(pre_resize_path) if os.path.exists(pre_resize_path) else 0
logger.info(f"[DIAGNOSTIC] Pre-resize screenshot saved: {pre_resize_path} ({pre_resize_size} bytes)")
except Exception as pre_e:
logger.warning(f"[DIAGNOSTIC] Failed to capture pre-resize screenshot: {pre_e}")
logger.info(f"[DIAGNOSTIC] Resizing viewport from current to 1920x{int(full_height)}")
await page.set_viewport_size({"width": 1920, "height": int(full_height)})
# DIAGNOSTIC: Increased wait time and log timing
logger.info("[DIAGNOSTIC] Waiting 10 seconds after viewport resize for re-render...")
await asyncio.sleep(10)
logger.info("[DIAGNOSTIC] Wait completed")
# DIAGNOSTIC: Count chart elements after resize and wait
chart_count_after = await page.evaluate("""() => {
return {
chartContainers: document.querySelectorAll('.chart-container, .slice_container').length,
canvasElements: document.querySelectorAll('canvas').length,
svgElements: document.querySelectorAll('.chart-container svg, .slice_container svg').length,
visibleCharts: document.querySelectorAll('.chart-container:visible, .slice_container:visible').length
};
}""")
logger.info(f"[DIAGNOSTIC] Chart elements AFTER viewport resize + wait: {chart_count_after}")
# DIAGNOSTIC: Check if any charts have error states
chart_errors = await page.evaluate("""() => {
const errors = [];
document.querySelectorAll('.chart-container, .slice_container').forEach((chart, i) => {
const errorEl = chart.querySelector('.error, .alert-danger, .ant-alert-error');
if (errorEl) {
errors.push({index: i, text: errorEl.innerText.substring(0, 100)});
}
});
return errors;
}""")
if chart_errors:
logger.warning(f"[DIAGNOSTIC] Charts with error states detected: {chart_errors}")
else:
logger.info("[DIAGNOSTIC] No chart error states detected")
# 3. Take screenshot using CDP to bypass Playwright's font loading wait
# @UX_STATE: [Capturing] -> Executing CDP screenshot
logger.info("[DEBUG] Attempting full-page screenshot via CDP...")
cdp = await page.context.new_cdp_session(page)
screenshot_data = await cdp.send("Page.captureScreenshot", {
"format": "png",
"fromSurface": True,
"captureBeyondViewport": True
})
image_data = base64.b64decode(screenshot_data["data"])
with open(output_path, 'wb') as f:
f.write(image_data)
# DIAGNOSTIC: Verify screenshot file
import os
final_size = os.path.getsize(output_path) if os.path.exists(output_path) else 0
logger.info(f"[DIAGNOSTIC] Final screenshot saved: {output_path}")
logger.info(f"[DIAGNOSTIC] Final screenshot size: {final_size} bytes ({final_size / 1024:.2f} KB)")
# DIAGNOSTIC: Get image dimensions
try:
with Image.open(output_path) as final_img:
logger.info(f"[DIAGNOSTIC] Final screenshot dimensions: {final_img.width}x{final_img.height}")
except Exception as img_err:
logger.warning(f"[DIAGNOSTIC] Could not read final image dimensions: {img_err}")
logger.info(f"Full-page screenshot saved to {output_path} (via CDP)")
except Exception as e:
logger.error(f"[DEBUG] Full-page/Tab capture failed: {e}")
try:
await page.screenshot(path=output_path, full_page=True, timeout=10000)
except Exception as e2:
logger.error(f"[DEBUG] Fallback screenshot also failed: {e2}")
await page.screenshot(path=output_path, timeout=5000)
await browser.close()
logger.info(f"Screenshot saved to {output_path}")
return True
# [/DEF:ScreenshotService.capture_dashboard:Function]
# [/DEF:ScreenshotService:Class]
# [DEF:LLMClient:Class]
# @PURPOSE: Wrapper for LLM provider APIs.
class LLMClient:
# [DEF:LLMClient.__init__:Function]
# @PURPOSE: Initializes the LLMClient with provider settings.
# @PRE: api_key, base_url, and default_model are non-empty strings.
def __init__(self, provider_type: LLMProviderType, api_key: str, base_url: str, default_model: str):
self.provider_type = provider_type
self.api_key = api_key
self.base_url = base_url
self.default_model = default_model
self.client = AsyncOpenAI(api_key=api_key, base_url=base_url)
# [DEF:analyze_dashboard:Function]
# @PURPOSE: Sends dashboard data to LLM for analysis.
# DEBUG: Log initialization parameters (without exposing full API key)
logger.info(f"[LLMClient.__init__] Initializing LLM client:")
logger.info(f"[LLMClient.__init__] Provider Type: {provider_type}")
logger.info(f"[LLMClient.__init__] Base URL: {base_url}")
logger.info(f"[LLMClient.__init__] Default Model: {default_model}")
logger.info(f"[LLMClient.__init__] API Key (first 8 chars): {api_key[:8] if api_key and len(api_key) > 8 else 'EMPTY_OR_NONE'}...")
logger.info(f"[LLMClient.__init__] API Key Length: {len(api_key) if api_key else 0}")
self.client = AsyncOpenAI(api_key=api_key, base_url=base_url)
# [/DEF:LLMClient.__init__:Function]
# [DEF:LLMClient.get_json_completion:Function]
# @PURPOSE: Helper to handle LLM calls with JSON mode and fallback parsing.
# @PRE: messages is a list of valid message dictionaries.
# @POST: Returns a parsed JSON dictionary.
# @SIDE_EFFECT: Calls external LLM API.
def _should_retry(exception: Exception) -> bool:
"""Custom retry predicate that excludes authentication errors."""
# Don't retry on authentication errors
if isinstance(exception, OpenAIAuthenticationError):
return False
# Retry on rate limit errors and other exceptions
return isinstance(exception, (RateLimitError, Exception))
@retry(
stop=stop_after_attempt(5),
wait=wait_exponential(multiplier=2, min=5, max=60),
retry=retry_if_exception_type((Exception, RateLimitError))
retry=retry_if_exception(_should_retry),
reraise=True
)
async def get_json_completion(self, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
with belief_scope("get_json_completion"):
response = None
try:
try:
logger.info(f"[get_json_completion] Attempting LLM call with JSON mode for model: {self.default_model}")
logger.info(f"[get_json_completion] Base URL being used: {self.base_url}")
logger.info(f"[get_json_completion] Number of messages: {len(messages)}")
logger.info(f"[get_json_completion] API Key present: {bool(self.api_key and len(self.api_key) > 0)}")
response = await self.client.chat.completions.create(
model=self.default_model,
messages=messages,
response_format={"type": "json_object"}
)
except Exception as e:
if "JSON mode is not enabled" in str(e) or "400" in str(e):
logger.warning(f"[get_json_completion] JSON mode failed or not supported: {str(e)}. Falling back to plain text response.")
response = await self.client.chat.completions.create(
model=self.default_model,
messages=messages
)
else:
raise e
logger.debug(f"[get_json_completion] LLM Response: {response}")
except OpenAIAuthenticationError as e:
logger.error(f"[get_json_completion] Authentication error: {str(e)}")
# Do not retry on auth errors - re-raise to stop retry
raise
except RateLimitError as e:
logger.warning(f"[get_json_completion] Rate limit hit: {str(e)}")
# Extract retry_delay from error metadata if available
retry_delay = 5.0 # Default fallback
try:
# Based on logs, the raw response is in e.body or e.response.json()
# The logs show 'metadata': {'raw': '...'} which suggests a proxy or specific client wrapper
# Let's try to find the 'retryDelay' in the error message or response
import re
# Try to find "retryDelay": "XXs" in the string representation of the error
error_str = str(e)
match = re.search(r'"retryDelay":\s*"(\d+)s"', error_str)
if match:
retry_delay = float(match.group(1))
else:
# Try to parse from response if it's a standard OpenAI-like error with body
if hasattr(e, 'body') and isinstance(e.body, dict):
# Some providers put it in details
details = e.body.get('error', {}).get('details', [])
for detail in details:
if detail.get('@type') == 'type.googleapis.com/google.rpc.RetryInfo':
delay_str = detail.get('retryDelay', '5s')
retry_delay = float(delay_str.rstrip('s'))
break
except Exception as parse_e:
logger.debug(f"[get_json_completion] Failed to parse retry delay: {parse_e}")
# Add a small safety margin (0.5s) as requested
wait_time = retry_delay + 0.5
logger.info(f"[get_json_completion] Waiting for {wait_time}s before retry...")
await asyncio.sleep(wait_time)
raise
except Exception as e:
logger.error(f"[get_json_completion] LLM call failed: {str(e)}")
raise
if not response or not hasattr(response, 'choices') or not response.choices:
raise RuntimeError(f"Invalid LLM response: {response}")
content = response.choices[0].message.content
logger.debug(f"[get_json_completion] Raw content to parse: {content}")
try:
return json.loads(content)
except json.JSONDecodeError:
logger.warning("[get_json_completion] Failed to parse JSON directly, attempting to extract from code blocks")
if "```json" in content:
json_str = content.split("```json")[1].split("```")[0].strip()
return json.loads(json_str)
elif "```" in content:
json_str = content.split("```")[1].split("```")[0].strip()
return json.loads(json_str)
else:
raise
# [/DEF:LLMClient.get_json_completion:Function]
# [DEF:LLMClient.analyze_dashboard:Function]
# @PURPOSE: Sends dashboard data (screenshot + logs) to LLM for health analysis.
# @PRE: screenshot_path exists, logs is a list of strings.
# @POST: Returns a structured analysis dictionary (status, summary, issues).
# @SIDE_EFFECT: Reads screenshot file and calls external LLM API.
async def analyze_dashboard(self, screenshot_path: str, logs: List[str]) -> Dict[str, Any]:
with belief_scope("analyze_dashboard"):
import base64
# Optimize image to reduce token count (US1 / T023)
# Gemini/Gemma models have limits on input tokens, and large images contribute significantly.
try:
with Image.open(screenshot_path) as img:
# Convert to RGB if necessary
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
# Resize if too large (max 1024px width while maintaining aspect ratio)
# We reduce width further to 1024px to stay within token limits for long dashboards
max_width = 1024
if img.width > max_width or img.height > 2048:
# Calculate scaling factor to fit within 1024x2048
scale = min(max_width / img.width, 2048 / img.height)
if scale < 1.0:
new_width = int(img.width * scale)
new_height = int(img.height * scale)
img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
logger.info(f"[analyze_dashboard] Resized image from {img.width}x{img.height} to {new_width}x{new_height}")
# Compress and convert to base64
buffer = io.BytesIO()
# Lower quality to 60% to further reduce payload size
img.save(buffer, format="JPEG", quality=60, optimize=True)
base_64_image = base64.b64encode(buffer.getvalue()).decode('utf-8')
logger.info(f"[analyze_dashboard] Optimized image size: {len(buffer.getvalue()) / 1024:.2f} KB")
except Exception as img_e:
logger.warning(f"[analyze_dashboard] Image optimization failed: {img_e}. Using raw image.")
with open(screenshot_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
base_64_image = base64.b64encode(image_file.read()).decode('utf-8')
log_text = "\n".join(logs)
prompt = f"""
@@ -177,11 +599,7 @@ class LLMClient:
}}
"""
logger.debug(f"[analyze_dashboard] Calling LLM with model: {self.default_model}")
try:
response = await self.client.chat.completions.create(
model=self.default_model,
messages=[
messages = [
{
"role": "user",
"content": [
@@ -189,36 +607,23 @@ class LLMClient:
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
"url": f"data:image/jpeg;base64,{base_64_image}"
}
}
]
}
],
response_format={"type": "json_object"}
)
logger.debug(f"[analyze_dashboard] LLM Response: {response}")
except RateLimitError as e:
logger.warning(f"[analyze_dashboard] Rate limit hit: {str(e)}")
raise # tenacity will handle retry
except Exception as e:
logger.error(f"[analyze_dashboard] LLM call failed: {str(e)}")
raise
]
if not response or not hasattr(response, 'choices') or not response.choices:
error_info = getattr(response, 'error', 'No choices in response')
logger.error(f"[analyze_dashboard] Invalid LLM response. Error info: {error_info}")
try:
return await self.get_json_completion(messages)
except Exception as e:
logger.error(f"[analyze_dashboard] Failed to get analysis: {str(e)}")
return {
"status": "FAIL",
"summary": f"Failed to get response from LLM: {error_info}",
"summary": f"Failed to get response from LLM: {str(e)}",
"issues": [{"severity": "FAIL", "message": "LLM provider returned empty or invalid response"}]
}
import json
result = json.loads(response.choices[0].message.content)
return result
# [/DEF:analyze_dashboard:Function]
# [/DEF:LLMClient.analyze_dashboard:Function]
# [/DEF:LLMClient:Class]
# [/DEF:backend.src.plugins.llm_analysis.service:Module]
# [/DEF:backend/src/plugins/llm_analysis/service.py:Module]

View File

@@ -19,7 +19,7 @@ import os
class EncryptionManager:
# @INVARIANT: Uses a secret key from environment or a default one (fallback only for dev).
def __init__(self):
self.key = os.getenv("ENCRYPTION_KEY", "7_u-l7-B-j9f5_V5z-5-5-5-5-5-5-5-5-5-5-5-5-5=").encode()
self.key = os.getenv("ENCRYPTION_KEY", "ZcytYzi0iHIl4Ttr-GdAEk117aGRogkGvN3wiTxrPpE=").encode()
self.fernet = Fernet(self.key)
def encrypt(self, data: str) -> str:
@@ -80,7 +80,8 @@ class LLMProviderService:
db_provider.provider_type = config.provider_type.value
db_provider.name = config.name
db_provider.base_url = config.base_url
if config.api_key != "********":
# Only update API key if provided (not None and not empty)
if config.api_key is not None and config.api_key != "":
db_provider.api_key = self.encryption.encrypt(config.api_key)
db_provider.default_model = config.default_model
db_provider.is_active = config.is_active
@@ -108,8 +109,19 @@ class LLMProviderService:
with belief_scope("get_decrypted_api_key"):
db_provider = self.get_provider(provider_id)
if not db_provider:
logger.warning(f"[get_decrypted_api_key] Provider {provider_id} not found in database")
return None
logger.info(f"[get_decrypted_api_key] Provider found: {db_provider.id}")
logger.info(f"[get_decrypted_api_key] Encrypted API key length: {len(db_provider.api_key) if db_provider.api_key else 0}")
try:
decrypted_key = self.encryption.decrypt(db_provider.api_key)
logger.info(f"[get_decrypted_api_key] Decryption successful, key length: {len(decrypted_key) if decrypted_key else 0}")
return decrypted_key
except Exception as e:
logger.error(f"[get_decrypted_api_key] Decryption failed: {str(e)}")
return None
return self.encryption.decrypt(db_provider.api_key)
# [/DEF:get_decrypted_api_key:Function]
# [/DEF:LLMProviderService:Class]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

View File

@@ -0,0 +1,78 @@
#!/usr/bin/env python3
"""Debug script to test Superset API authentication"""
import json
import requests
from pprint import pprint
from src.core.superset_client import SupersetClient
from src.core.config_manager import ConfigManager
def main():
print("Debugging Superset API authentication...")
config = ConfigManager()
# Select first available environment
environments = config.get_environments()
if not environments:
print("No environments configured")
return
env = environments[0]
print(f"\nTesting environment: {env.name}")
print(f"URL: {env.url}")
try:
# Test API client authentication
print("\n--- Testing API Authentication ---")
client = SupersetClient(env)
tokens = client.authenticate()
print("\nAPI Auth Success!")
print(f"Access Token: {tokens.get('access_token', 'N/A')}")
print(f"CSRF Token: {tokens.get('csrf_token', 'N/A')}")
# Debug cookies from session
print("\n--- Session Cookies ---")
for cookie in client.network.session.cookies:
print(f"{cookie.name}={cookie.value}")
# Test accessing UI via requests
print("\n--- Testing UI Access ---")
ui_url = env.url.rstrip('/').replace('/api/v1', '')
print(f"UI URL: {ui_url}")
# Try to access UI home page
ui_response = client.network.session.get(ui_url, timeout=30, allow_redirects=True)
print(f"Status Code: {ui_response.status_code}")
print(f"URL: {ui_response.url}")
# Check response headers
print("\n--- Response Headers ---")
pprint(dict(ui_response.headers))
print(f"\n--- Response Content Preview (200 chars) ---")
print(repr(ui_response.text[:200]))
if ui_response.status_code == 200:
print("\nUI Access: Success")
# Try to access a dashboard
# For testing, just use the home page
print("\n--- Checking if login is required ---")
if "login" in ui_response.url.lower() or "login" in ui_response.text.lower():
print("❌ Not logged in to UI")
else:
print("✅ Logged in to UI")
except Exception as e:
print(f"\n❌ Error: {type(e).__name__}: {e}")
import traceback
print("\nStack Trace:")
print(traceback.format_exc())
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env python3
"""Test script to debug API key decryption issue."""
from src.core.database import SessionLocal
from src.models.llm import LLMProvider
from cryptography.fernet import Fernet
import os
# Get the encryption key
key = os.getenv("ENCRYPTION_KEY", "ZcytYzi0iHIl4Ttr-GdAEk117aGRogkGvN3wiTxrPpE=").encode()
print(f"Encryption key (first 20 chars): {key[:20]}")
print(f"Encryption key length: {len(key)}")
# Create Fernet instance
fernet = Fernet(key)
# Get provider from database
db = SessionLocal()
provider = db.query(LLMProvider).filter(LLMProvider.id == '6c899741-4108-4196-aea4-f38ad2f0150e').first()
if provider:
print(f"\nProvider found:")
print(f" ID: {provider.id}")
print(f" Name: {provider.name}")
print(f" Encrypted API Key (first 50 chars): {provider.api_key[:50]}")
print(f" Encrypted API Key Length: {len(provider.api_key)}")
# Test decryption
print(f"\nAttempting decryption...")
try:
decrypted = fernet.decrypt(provider.api_key.encode()).decode()
print(f"Decryption successful!")
print(f" Decrypted key length: {len(decrypted)}")
print(f" Decrypted key (first 8 chars): {decrypted[:8]}")
print(f" Decrypted key is empty: {len(decrypted) == 0}")
except Exception as e:
print(f"Decryption failed with error: {e}")
print(f"Error type: {type(e).__name__}")
import traceback
traceback.print_exc()
else:
print("Provider not found")
db.close()

View File

@@ -0,0 +1 @@
[{"key[": 20, ")\n\n# Create Fernet instance\nfernet = Fernet(key)\n\n# Test encrypting an empty string\nempty_encrypted = fernet.encrypt(b\"": ".", "print(f": "nEncrypted empty string: {empty_encrypted"}, {"test-api-key-12345\"\ntest_encrypted = fernet.encrypt(test_key.encode()).decode()\nprint(f": "nEncrypted test key: {test_encrypted"}, {"gAAAAABphhwSZie0OwXjJ78Fk-c4Uo6doNJXipX49AX7Bypzp4ohiRX3hXPXKb45R1vhNUOqbm6Ke3-eRwu_KdWMZ9chFBKmqw==\"\nprint(f": "nStored encrypted key: {stored_key"}, {"len(stored_key)}": "Check if stored key matches empty string encryption\nif stored_key == empty_encrypted:\n print(", "string!": "else:\n print(", "print(f": "mpty string encryption: {empty_encrypted"}, {"stored_key}": "Try to decrypt the stored key\ntry:\n decrypted = fernet.decrypt(stored_key.encode()).decode()\n print(f", "print(f": "ecrypted key length: {len(decrypted)"}, {")\nexcept Exception as e:\n print(f": "nDecryption failed with error: {e"}]

View File

@@ -0,0 +1,38 @@
# Архитектурное решение: Standalone сервис vs Интеграция в Superset
## Рекомендация: Оставить отдельным сервисом
Хотя интеграция непосредственно в Superset может показаться привлекательной для создания "единого интерфейса", **настоятельно рекомендуется оставить `ss-tools` отдельным сервисом** по следующим архитектурным и техническим причинам.
### 1. Архитектурный паттерн: Оркестратор против Инстанса
* **Проблема:** `ss-tools` действует как **"Менеджер менеджеров"**. Его основная ценность заключается в подключении и перемещении данных *между* различными окружениями Superset (Dev → Stage → Prod).
* **Почему разделение лучше:** Если встроить эту логику в Superset, придется сделать один конкретный инстанс (например, "Dev") "главным контроллером" для остальных. Если этот инстанс упадет, вы потеряете возможности управления. Отдельный инструмент находится *над* окружениями, рассматривая их все как равные endpoints.
### 2. Несовместимость технологического стека
* **Superset:** Бэкенд на **Flask** (Python) с фронтендом на **React**.
* **ss-tools:** Бэкенд на **FastAPI** (Python) с фронтендом на **SvelteKit**.
* **Цена миграции:** Перенос в Superset потребует **полного переписывания** всего фронтенда с Svelte на React, чтобы он мог работать как плагин или расширение Superset. Также потребуется рефакторинг внедрения зависимостей (Dependency Injection) из FastAPI, чтобы соответствовать структуре Flask-AppBuilder в Superset.
### 3. Цикл релизов и гибкость
* **Superset:** Имеет сложный и медленный цикл релизов. Внедрение кастомных фич в основной репозиторий (upstream) затруднительно, а поддержка собственного "форка" Superset — это кошмар сопровождения (конфликты слияния при каждом обновлении).
* **ss-tools:** Как отдельный микросервис, вы можете развертывать новые функции (например, новые LLM плагины или Git-процессы) мгновенно, не беспокоясь о том, что сломаете основную BI-платформу.
### 4. Безопасность и изоляция
* **Права доступа:** `ss-tools` имеет свои собственные страницы администрирования (`AdminRolesPage` и `AdminUsersPage`), управляющие правами специально для задач *деплоя* и *резервного копирования*. Это задачи DevOps, а не бизнес-аналитики (BI). Смешивание привилегий развертывания с привилегиями просмотра данных в одной системе увеличивает "радиус поражения" в случае проблем с безопасностью.
### Техническая реализуемость (если все же решиться)
Это технически возможно, но процесс будет выглядеть так:
1. **Frontend:** Придется переписать все компоненты Svelte (`DashboardGrid`, `MappingTable` и т.д.) как компоненты React, чтобы они могли жить внутри `superset-frontend`.
2. **Backend:** Вероятно, придется реализовать это как серверное **расширение Superset** (используя Flask Blueprints), создав новые API endpoints, которые будет вызывать фронтенд на React.
3. **Database:** Придется добавить таблицы проекта (`task_records`, `connection_configs`) в базу данных метаданных Superset через миграции Alembic.
### Итоговая таблица
| Характеристика | Отдельный сервис (Текущий) | Интеграция в Superset |
| :--- | :--- | :--- |
| **Скорость разработки** | 🚀 Высокая (Независимая) | 🐢 Низкая (Связана с монолитом) |
| **Технологический стек** | Современный (FastAPI + SvelteKit) | Legacy/Строгий (Flask + React) |
| **Управление мульти-окружением** | ✅ Нативно (Спроектирован для этого) | ⚠️ Неудобно (Один инстанс управляет всеми) |
| **Поддержка (Maintenance)** | ✅ Низкая (Обновление в любое время) | ❌ Высокая (Rebase при каждом обновлении Superset) |
**Вывод:** Проект уже хорошо спроектирован как специализированный инструмент DevOps (слой `DevOps/Tooling` в вашей семантической карте). Слияние его с Superset, скорее всего, приведет к техническому долгу и регрессии функциональности.

View File

@@ -75,8 +75,15 @@
const method = editingProvider ? 'PUT' : 'POST';
const endpoint = editingProvider ? `/llm/providers/${editingProvider.id}` : '/llm/providers';
// When editing, only include api_key if user entered a new one
const submitData = { ...formData };
if (editingProvider && !submitData.api_key) {
// If editing and api_key is empty, don't send it (backend will keep existing)
delete submitData.api_key;
}
try {
await requestApi(endpoint, method, formData);
await requestApi(endpoint, method, submitData);
showForm = false;
resetForm();
onSave();

File diff suppressed because it is too large Load Diff

View File

@@ -27,6 +27,7 @@ Stores the outcome of a dashboard validation task.
| timestamp | DateTime | Yes | When the validation ran |
| status | Enum | Yes | `PASS`, `WARN`, `FAIL` |
| screenshot_path | String | No | Path to the captured screenshot (if stored) |
| screenshot_metadata | JSON | No | `{width: 1920, height: dynamic, tabs_processed: []}` |
| issues | JSON | Yes | List of detected issues `[{severity, message, location}]` |
| raw_response | Text | No | Full LLM response for debugging |

View File

@@ -23,6 +23,25 @@ This feature implements two new plugins for the `ss-tools` platform: `DashboardV
**Constraints**: Must integrate with existing `PluginBase` and `TaskManager`. Secure storage for API keys.
**Scale/Scope**: Support for configurable LLM providers, scheduled tasks, and notification integration.
## Implementation Notes
### Screenshot Capture Implementation
The screenshot service uses Playwright with Chrome DevTools Protocol (CDP) to avoid font loading timeouts in headless mode. Key implementation details:
- **Full Page Capture**: Uses CDP `Page.captureScreenshot` with `captureBeyondViewport: true` and `fromSurface: true`
- **Tab Switching**: Implements recursive tab switching to trigger lazy-loaded chart rendering on multi-tab dashboards
- **Authentication**: Uses direct UI login flow (navigating to `/login/` and filling credentials) instead of API cookie injection for better reliability
- **Resolution**: 1920px width with dynamic full page height calculation
### Authentication Flow
The service authenticates via Playwright UI login rather than API authentication:
1. Navigate to `/login/` page
2. Fill username/password fields (supports multiple Superset versions)
3. Click login button
4. Verify successful redirect to `/superset/welcome/`
5. Navigate to dashboard with valid session
This approach is more reliable than API-to-UI cookie injection which was causing 403 Forbidden errors.
## Constitution Check
*GATE: Must pass before Phase 0 research. Re-check after Phase 1 design.*
@@ -60,7 +79,8 @@ backend/
│ │ ├── llm_analysis/ # New Plugin Directory
│ │ │ ├── __init__.py
│ │ │ ├── plugin.py # Implements DashboardValidationPlugin & DocumentationPlugin
│ │ │ ├── service.py # LLM interaction logic
│ │ │ ├── service.py # LLM interaction logic + ScreenshotService (CDP, tab switching)
│ │ │ ├── scheduler.py # Scheduled validation task handler
│ │ │ └── models.py # Pydantic models for LLM config/results
│ │ └── git/ # Existing Git Plugin
│ │ └── ... # Update for commit message generation

View File

@@ -82,6 +82,8 @@ As a Developer, I want the system to suggest commit messages based on changes di
- What happens if the dashboard screenshot cannot be generated? (System should proceed with logs only or fail depending on configuration).
- What happens if the context (logs/metadata) exceeds the LLM's token limit? (System should truncate or summarize input).
- How does the system handle missing API keys? (Task should fail immediately with a configuration error).
- What happens if the dashboard has multiple tabs with lazy-loaded charts? (System must switch through all tabs recursively to trigger chart rendering before capture).
- What happens if Playwright encounters font loading timeouts in headless mode? (System must use CDP `Page.captureScreenshot` to bypass Playwright's internal timeout mechanism).
## Requirements *(mandatory)*
@@ -92,9 +94,11 @@ As a Developer, I want the system to suggest commit messages based on changes di
- **FR-028**: The system MUST mask all API keys in the UI and logs, displaying only the last 4 characters (e.g., `sk-...1234`). [Security]
- **FR-003**: System MUST implement a `DashboardValidationPlugin` that integrates with the existing `PluginBase` architecture.
- **FR-004**: `DashboardValidationPlugin` MUST accept a dashboard identifier as input.
- **FR-005**: `DashboardValidationPlugin` MUST be capable of retrieving a visual representation (screenshot) of the dashboard. The visual representation MUST be a PNG or JPEG image with a minimum resolution of 1280x720px to ensure legibility for the LLM. [Clarity]
- **FR-005**: `DashboardValidationPlugin` MUST be capable of retrieving a visual representation (screenshot) of the dashboard. The visual representation MUST be a PNG image with a resolution of 1920px width and full page height to ensure all dashboard content is captured. [Clarity]
- **FR-016**: System MUST support configurable screenshot strategies: 'Headless Browser' (default, high accuracy) and 'API Thumbnail' (fallback/fast).
- **FR-006**: `DashboardValidationPlugin` MUST retrieve recent execution logs associated with the dashboard, limited to the last 100 lines or 24 hours (whichever is smaller) to prevent token overflow. [Reliability]
- **FR-030**: The screenshot capture MUST use Playwright with Chrome DevTools Protocol (CDP) to avoid font loading timeouts in headless mode.
- **FR-031**: The screenshot capture MUST implement recursive tab switching to trigger lazy-loaded chart rendering on multi-tab dashboards before capturing.
- **FR-006**: `DashboardValidationPlugin` MUST retrieve recent execution logs associated with the dashboard from the Environment API (e.g., `/api/v1/log/`), limited to the last 100 lines or 24 hours (whichever is smaller) to prevent token overflow. [Reliability]
- **FR-007**: `DashboardValidationPlugin` MUST combine visual and text data to prompt a Multimodal LLM for analysis. The analysis output MUST be structured as a JSON object containing `status` (Pass/Fail), `issues` (list of strings), and `summary` (text) to enable structured UI presentation. [Clarity]
- **FR-008**: System MUST implement a `DocumentationPlugin` (or similar) for documenting datasets and dashboards.
- **FR-009**: `DocumentationPlugin` MUST retrieve schema and metadata for the target asset.
@@ -131,3 +135,5 @@ As a Developer, I want the system to suggest commit messages based on changes di
- **SC-002**: A dashboard validation task completes within 90 seconds (assuming standard LLM latency).
- **SC-003**: The system successfully processes a multimodal prompt (image + text) and returns a structured analysis.
- **SC-004**: Generated documentation for a standard dataset contains descriptions for at least 80% of the columns (based on LLM capability, but pipeline must support it).
- **SC-005**: Screenshots capture full dashboard content including all tabs (1920px width, full height) without font loading timeouts.
- **SC-006**: Analysis results are displayed in task logs with clear `[ANALYSIS_SUMMARY]` and `[ANALYSIS_ISSUE]` markers for easy parsing.

View File

@@ -30,7 +30,7 @@
**Goal**: Implement core services and shared infrastructure.
- [x] T008 Implement `LLMProviderService` in `backend/src/services/llm_provider.py` (CRUD for providers, AES-256 encryption)
- [x] T009 Implement `ScreenshotService` in `backend/src/plugins/llm_analysis/service.py` (Playwright + API strategies, fallback logic, min 1280x720px resolution)
- [x] T009 Implement `ScreenshotService` in `backend/src/plugins/llm_analysis/service.py` (Playwright + API strategies, fallback logic, 1920px width, full page height, CDP screenshot, recursive tab switching)
- [x] T010 Implement `LLMClient` in `backend/src/plugins/llm_analysis/service.py` (OpenAI SDK wrapper, retry logic with tenacity, rate limit handling)
- [x] T011 Create `backend/src/plugins/llm_analysis/plugin.py` with `PluginBase` implementation stubs
- [x] T012 Define database schema updates for `LLMProviderConfig` in `backend/src/models/llm.py` (or appropriate location)
@@ -43,7 +43,7 @@
- [x] T014 [US1] Implement `validate_dashboard` task logic in `backend/src/plugins/llm_analysis/plugin.py`
- [x] T015 [US1] Implement log retrieval logic (fetch recent logs, limit 100 lines/24h) in `backend/src/plugins/llm_analysis/plugin.py`
- [x] T016 [US1] Construct multimodal prompt (image + text) in `backend/src/plugins/llm_analysis/service.py` (implement PII/credential filtering)
- [x] T017 [US1] Implement result parsing and persistence (ValidationResult) in `backend/src/plugins/llm_analysis/plugin.py` (ensure JSON structure: status, issues, summary)
- [x] T017 [US1] Implement result parsing and persistence (ValidationResult) in `backend/src/plugins/llm_analysis/plugin.py` (ensure JSON structure: status, issues, summary, log results to task output with `[ANALYSIS_SUMMARY]` and `[ANALYSIS_ISSUE]` markers)
- [x] T018 [US1] Add `validate` endpoint trigger in `backend/src/api/routes/tasks.py` (or reuse existing dispatch)
- [x] T019 [US1] Implement notification dispatch (Email/Pulse) on failure in `backend/src/plugins/llm_analysis/plugin.py` (Summary + Link format)
- [x] T020 [US1] Create `frontend/src/components/llm/ValidationReport.svelte` for viewing results

View File

@@ -247,6 +247,14 @@
- 📝 Fetches AD mappings and roles from the backend to populate the UI.
- ƒ **handleCreateMapping** (`Function`)
- 📝 Submits a new AD Group to Role mapping to the backend.
- 🧩 **LLMSettingsPage** (`Component`)
- 📝 Admin settings page for LLM provider configuration.
- 🏗️ Layer: UI
- 📦 **+page** (`Module`) `[TRIVIAL]`
- 📝 Auto-generated module for frontend/src/routes/admin/settings/llm/+page.svelte
- 🏗️ Layer: Unknown
- ƒ **fetchProviders** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 🧩 **MigrationDashboard** (`Component`)
- 📝 Main dashboard for configuring and starting migrations.
- 🏗️ Layer: Page
@@ -492,10 +500,12 @@
- 📝 Displays a grid of dashboards with selection and pagination.
- 🏗️ Layer: Component
- 🔒 Invariant: Selected IDs must be a subset of available dashboards.
- 📥 Props: dashboards: DashboardMetadata[] , selectedIds: number[]
- 📥 Props: dashboards: DashboardMetadata[] , selectedIds: number[] , environmentId: string
- ⚡ Events: selectionChanged
- ➡️ WRITES_TO `t`
- ⬅️ READS_FROM `t`
- ƒ **handleValidate** (`Function`)
- 📝 Triggers dashboard validation task.
- ƒ **handleSort** (`Function`)
- 📝 Toggles sort direction or changes sort column.
- ƒ **handleSelectionChange** (`Function`)
@@ -647,6 +657,13 @@
- 📝 Fetches environments and saved connections.
- ƒ **handleRunMapper** (`Function`)
- 📝 Triggers the MapperPlugin task.
- ƒ **handleGenerateDocs** (`Function`)
- 📝 Triggers the LLM Documentation task.
- 📦 **MapperTool** (`Module`) `[TRIVIAL]`
- 📝 Auto-generated module for frontend/src/components/tools/MapperTool.svelte
- 🏗️ Layer: Unknown
- ƒ **handleApplyDoc** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 🧩 **DebugTool** (`Component`)
- 📝 UI component for system diagnostics and debugging API responses.
- 🏗️ Layer: UI
@@ -692,6 +709,8 @@
- 🏗️ Layer: Component
- 📥 Props: dashboardId: any, show: any
- ⚡ Events: commit
- ƒ **handleGenerateMessage** (`Function`)
- 📝 Generates a commit message using LLM.
- ƒ **loadStatus** (`Function`)
- 📝 Загружает текущий статус репозитория и diff.
- ƒ **handleCommit** (`Function`)
@@ -728,6 +747,41 @@
- 📝 Pushes local commits to the remote repository.
- ƒ **handlePull** (`Function`)
- 📝 Pulls changes from the remote repository.
- 🧩 **DocPreview** (`Component`)
- 📝 UI component for previewing generated dataset documentation before saving.
- 🏗️ Layer: UI
- 📥 Props: documentation: any, onSave: any, onCancel: any
- ➡️ WRITES_TO `t`
- ⬅️ READS_FROM `t`
- 📦 **DocPreview** (`Module`) `[TRIVIAL]`
- 📝 Auto-generated module for frontend/src/components/llm/DocPreview.svelte
- 🏗️ Layer: Unknown
- ƒ **handleSave** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 🧩 **ProviderConfig** (`Component`)
- 📝 UI form for managing LLM provider configurations.
- 🏗️ Layer: UI
- 📥 Props: providers: any, onSave: any
- ➡️ WRITES_TO `t`
- ⬅️ READS_FROM `t`
- 📦 **ProviderConfig** (`Module`) `[TRIVIAL]`
- 📝 Auto-generated module for frontend/src/components/llm/ProviderConfig.svelte
- 🏗️ Layer: Unknown
- ƒ **resetForm** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **handleEdit** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **testConnection** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **handleSubmit** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **toggleActive** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 📦 **ValidationReport** (`Module`) `[TRIVIAL]`
- 📝 Auto-generated module for frontend/src/components/llm/ValidationReport.svelte
- 🏗️ Layer: Unknown
- ƒ **getStatusColor** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 📦 **backend.delete_running_tasks** (`Module`)
- 📝 Script to delete tasks with RUNNING status from the database.
- 🏗️ Layer: Utility
@@ -752,6 +806,8 @@
- 📝 Serves frontend static files or index.html for SPA routing.
- ƒ **read_root** (`Function`)
- 📝 A simple root endpoint to confirm that the API is running when frontend is missing.
- ƒ **network_error_handler** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 📦 **Dependencies** (`Module`)
- 📝 Manages the creation and provision of shared application dependencies, such as the PluginLoader and TaskManager, to avoid circular imports.
- 🏗️ Layer: Core
@@ -941,6 +997,8 @@
- 🏗️ Layer: Core
- 🔒 Invariant: A single engine instance is used for the entire application.
- 🔗 DEPENDS_ON -> `sqlalchemy`
- 📦 **BASE_DIR** (`Variable`)
- 📝 Base directory for the backend (where .db files should reside).
- 📦 **DATABASE_URL** (`Constant`)
- 📝 URL for the main mappings database.
- 📦 **TASKS_DATABASE_URL** (`Constant`)
@@ -1328,6 +1386,20 @@
- 📝 Initiates the ADFS OIDC login flow.
- ƒ **auth_callback_adfs** (`Function`)
- 📝 Handles the callback from ADFS after successful authentication.
- 📦 **router** (`Global`)
- 📝 APIRouter instance for LLM routes.
- ƒ **get_providers** (`Function`)
- 📝 Retrieve all LLM provider configurations.
- ƒ **create_provider** (`Function`)
- 📝 Create a new LLM provider configuration.
- ƒ **update_provider** (`Function`)
- 📝 Update an existing LLM provider configuration.
- ƒ **delete_provider** (`Function`)
- 📝 Delete an LLM provider configuration.
- ƒ **test_connection** (`Function`)
- 📝 Test connection to an LLM provider.
- ƒ **test_provider_config** (`Function`)
- 📝 Test connection with a provided configuration (not yet saved).
- 📦 **backend.src.api.routes.git** (`Module`)
- 📝 Provides FastAPI endpoints for Git integration operations.
- 🏗️ Layer: API
@@ -1366,6 +1438,8 @@
- 📝 Get current Git status for a dashboard repository.
- ƒ **get_repository_diff** (`Function`)
- 📝 Get Git diff for a dashboard repository.
- ƒ **generate_commit_message** (`Function`)
- 📝 Generate a suggested commit message using LLM.
- 📦 **ConnectionsRouter** (`Module`)
- 📝 Defines the FastAPI router for managing external database connections.
- 🏗️ Layer: UI (API)
@@ -1546,6 +1620,15 @@
- 📝 Resume a task that is awaiting input (e.g., passwords).
- ƒ **clear_tasks** (`Function`)
- 📝 Clear tasks matching the status filter.
- 📦 **backend.src.models.llm** (`Module`)
- 📝 SQLAlchemy models for LLM provider configuration and validation results.
- 🏗️ Layer: Domain
- **LLMProvider** (`Class`)
- 📝 SQLAlchemy model for LLM provider configuration.
- **ValidationRecord** (`Class`)
- 📝 SQLAlchemy model for dashboard validation history.
- ƒ **generate_uuid** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 📦 **GitModels** (`Module`)
- 📝 Git-specific SQLAlchemy models for configuration and repository tracking.
- 🏗️ Layer: Model
@@ -1608,6 +1691,36 @@
- **ADGroupMapping** (`Class`)
- 📝 Maps an Active Directory group to a local System Role.
- 🔗 DEPENDS_ON -> `Role`
- 📦 **backend.src.services.llm_provider** (`Module`)
- 📝 Service for managing LLM provider configurations with encrypted API keys.
- 🏗️ Layer: Domain
- 🔗 DEPENDS_ON -> `backend.src.core.database`
- 🔗 DEPENDS_ON -> `backend.src.models.llm`
- **EncryptionManager** (`Class`)
- 📝 Handles encryption and decryption of sensitive data like API keys.
- 🔒 Invariant: Uses a secret key from environment or a default one (fallback only for dev).
- **LLMProviderService** (`Class`)
- 📝 Service to manage LLM provider lifecycle.
- ƒ **get_all_providers** (`Function`)
- 📝 Returns all configured LLM providers.
- ƒ **get_provider** (`Function`)
- 📝 Returns a single LLM provider by ID.
- ƒ **create_provider** (`Function`)
- 📝 Creates a new LLM provider with encrypted API key.
- ƒ **update_provider** (`Function`)
- 📝 Updates an existing LLM provider.
- ƒ **delete_provider** (`Function`)
- 📝 Deletes an LLM provider.
- ƒ **get_decrypted_api_key** (`Function`)
- 📝 Returns the decrypted API key for a provider.
- ƒ **__init__** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **encrypt** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **decrypt** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **__init__** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 📦 **backend.src.services.auth_service** (`Module`)
- 📝 Orchestrates authentication business logic.
- 🏗️ Layer: Service
@@ -1814,6 +1927,72 @@
- 📝 Executes the dashboard migration logic.
- 📦 **MigrationPlugin.execute** (`Action`)
- 📝 Execute the migration logic with proper task logging.
- ƒ **schedule_dashboard_validation** (`Function`)
- 📝 Schedules a recurring dashboard validation task.
- 📦 **scheduler** (`Module`) `[TRIVIAL]`
- 📝 Auto-generated module for backend/src/plugins/llm_analysis/scheduler.py
- 🏗️ Layer: Unknown
- ƒ **job_func** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **_parse_cron** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- **LLMProviderType** (`Class`)
- 📝 Enum for supported LLM providers.
- **LLMProviderConfig** (`Class`)
- 📝 Configuration for an LLM provider.
- **ValidationStatus** (`Class`)
- 📝 Enum for dashboard validation status.
- **DetectedIssue** (`Class`)
- 📝 Model for a single issue detected during validation.
- **ValidationResult** (`Class`)
- 📝 Model for dashboard validation result.
- 📦 **backend.src.plugins.llm_analysis.plugin** (`Module`)
- 📝 Implements DashboardValidationPlugin and DocumentationPlugin.
- 🏗️ Layer: Domain
- **DashboardValidationPlugin** (`Class`)
- 📝 Plugin for automated dashboard health analysis using LLMs.
- **DocumentationPlugin** (`Class`)
- 📝 Plugin for automated dataset documentation using LLMs.
- ƒ **id** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **name** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **description** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **version** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **get_schema** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **execute** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **id** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **name** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **description** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **version** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **get_schema** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **execute** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 📦 **backend.src.plugins.llm_analysis.service** (`Module`)
- 📝 Services for LLM interaction and dashboard screenshots.
- 🏗️ Layer: Domain
- 🔗 DEPENDS_ON -> `playwright`
- 🔗 DEPENDS_ON -> `openai`
- 🔗 DEPENDS_ON -> `tenacity`
- **ScreenshotService** (`Class`)
- 📝 Handles capturing screenshots of Superset dashboards.
- ƒ **capture_dashboard** (`Function`)
- 📝 Captures a screenshot of a dashboard using Playwright.
- **LLMClient** (`Class`)
- 📝 Wrapper for LLM provider APIs.
- ƒ **__init__** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **__init__** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- 📦 **StoragePlugin** (`Module`)
- 📝 Provides core filesystem operations for managing backups and repositories.
- 🏗️ Layer: App
@@ -1854,6 +2033,15 @@
- 📝 Deletes a file or directory from the specified category and path.
- ƒ **get_file_path** (`Function`)
- 📝 Returns the absolute path of a file for download.
- **GitLLMExtension** (`Class`)
- 📝 Provides LLM capabilities to the Git plugin.
- ƒ **suggest_commit_message** (`Function`)
- 📝 Generates a suggested commit message based on a diff and history.
- 📦 **llm_extension** (`Module`) `[TRIVIAL]`
- 📝 Auto-generated module for backend/src/plugins/git/llm_extension.py
- 🏗️ Layer: Unknown
- ƒ **__init__** (`Function`) `[TRIVIAL]`
- 📝 Auto-detected function (orphan)
- ƒ **test_environment_model** (`Function`)
- 📝 Tests that Environment model correctly stores values.
- ƒ **test_belief_scope_logs_entry_action_exit** (`Function`)