ready for test

This commit is contained in:
2026-02-25 13:35:09 +03:00
parent 21e969a769
commit 33433c3173
11 changed files with 994 additions and 351 deletions

View File

@@ -0,0 +1,195 @@
# [DEF:backend.src.core.mapping_service:Module]
#
# @TIER: CRITICAL
# @SEMANTICS: mapping, ids, synchronization, environments, cross-filters
# @PURPOSE: Service for tracking and synchronizing Superset Resource IDs (UUID <-> Integer ID)
# @LAYER: Core
# @RELATION: DEPENDS_ON -> backend.src.models.mapping (ResourceMapping, ResourceType)
# @RELATION: DEPENDS_ON -> backend.src.core.logger
# @TEST_DATA: mock_superset_resources -> {'chart': [{'id': 42, 'uuid': '1234', 'slice_name': 'test'}], 'dataset': [{'id': 99, 'uuid': '5678', 'table_name': 'data'}]}
#
# @INVARIANT: sync_environment must handle remote API failures gracefully.
# [SECTION: IMPORTS]
from typing import Dict, List, Optional
from datetime import datetime, timezone
from sqlalchemy.orm import Session
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.cron import CronTrigger
from src.models.mapping import ResourceMapping, ResourceType
from src.core.logger import logger, belief_scope
# [/SECTION]
# [DEF:IdMappingService:Class]
# @TIER: CRITICAL
# @PURPOSE: Service handling the cataloging and retrieval of remote Superset Integer IDs.
class IdMappingService:
# [DEF:__init__:Function]
# @PURPOSE: Initializes the mapping service.
def __init__(self, db_session: Session):
self.db = db_session
self.scheduler = BackgroundScheduler()
self._sync_job = None
# [/DEF:__init__:Function]
# [DEF:start_scheduler:Function]
# @PURPOSE: Starts the background scheduler with a given cron string.
# @PARAM: cron_string (str) - Cron expression for the sync interval.
# @PARAM: environments (List[str]) - List of environment IDs to sync.
# @PARAM: superset_client_factory - Function to get a client for an environment.
def start_scheduler(self, cron_string: str, environments: List[str], superset_client_factory):
with belief_scope("IdMappingService.start_scheduler"):
if self._sync_job:
self.scheduler.remove_job(self._sync_job.id)
logger.info("[IdMappingService.start_scheduler][Reflect] Removed existing sync job.")
def sync_all():
for env_id in environments:
client = superset_client_factory(env_id)
if client:
self.sync_environment(env_id, client)
self._sync_job = self.scheduler.add_job(
sync_all,
CronTrigger.from_crontab(cron_string),
id='id_mapping_sync_job',
replace_existing=True
)
if not self.scheduler.running:
self.scheduler.start()
logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Started background scheduler with cron: {cron_string}")
else:
logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Updated background scheduler with cron: {cron_string}")
# [/DEF:start_scheduler:Function]
# [DEF:sync_environment:Function]
# @PURPOSE: Fully synchronizes mapping for a specific environment.
# @PARAM: environment_id (str) - Target environment ID.
# @PARAM: superset_client - Instance capable of hitting the Superset API.
# @PRE: environment_id exists in the database.
# @POST: ResourceMapping records for the environment are created or updated.
def sync_environment(self, environment_id: str, superset_client) -> None:
"""
Polls the Superset APIs for the target environment and updates the local mapping table.
"""
with belief_scope("IdMappingService.sync_environment"):
logger.info(f"[IdMappingService.sync_environment][Action] Starting sync for environment {environment_id}")
# Implementation Note: In a real scenario, superset_client needs to be an instance
# capable of auth & iteration over /api/v1/chart/, /api/v1/dataset/, /api/v1/dashboard/
# Here we structure the logic according to the spec.
types_to_poll = [
(ResourceType.CHART, "chart", "slice_name"),
(ResourceType.DATASET, "dataset", "table_name"),
(ResourceType.DASHBOARD, "dashboard", "slug") # Note: dashboard slug or dashboard_title
]
total_synced = 0
try:
for res_enum, endpoint, name_field in types_to_poll:
logger.debug(f"[IdMappingService.sync_environment][Explore] Polling {endpoint} endpoint")
# Simulated API Fetch (Would be: superset_client.get(f"/api/v1/{endpoint}/")... )
# This relies on the superset API structure, e.g. { "result": [{"id": 1, "uuid": "...", name_field: "..."}] }
# We assume superset_client provides a generic method to fetch all pages.
try:
resources = superset_client.get_all_resources(endpoint)
for res in resources:
res_uuid = res.get("uuid")
res_id = str(res.get("id")) # Store as string
res_name = res.get(name_field)
if not res_uuid or not res_id:
continue
# Upsert Logic
mapping = self.db.query(ResourceMapping).filter_by(
environment_id=environment_id,
resource_type=res_enum,
uuid=res_uuid
).first()
if mapping:
mapping.remote_integer_id = res_id
mapping.resource_name = res_name
mapping.last_synced_at = datetime.now(timezone.utc)
else:
new_mapping = ResourceMapping(
environment_id=environment_id,
resource_type=res_enum,
uuid=res_uuid,
remote_integer_id=res_id,
resource_name=res_name,
last_synced_at=datetime.now(timezone.utc)
)
self.db.add(new_mapping)
total_synced += 1
except Exception as loop_e:
logger.error(f"[IdMappingService.sync_environment][Reason] Error polling {endpoint}: {loop_e}")
# Continue to next resource type instead of blowing up the whole sync
self.db.commit()
logger.info(f"[IdMappingService.sync_environment][Coherence:OK] Successfully synced {total_synced} items.")
except Exception as e:
self.db.rollback()
logger.error(f"[IdMappingService.sync_environment][Coherence:Failed] Critical sync failure: {e}")
raise
# [/DEF:sync_environment:Function]
# [DEF:get_remote_id:Function]
# @PURPOSE: Retrieves the remote integer ID for a given universal UUID.
# @PARAM: environment_id (str)
# @PARAM: resource_type (ResourceType)
# @PARAM: uuid (str)
# @RETURN: Optional[int]
def get_remote_id(self, environment_id: str, resource_type: ResourceType, uuid: str) -> Optional[int]:
mapping = self.db.query(ResourceMapping).filter_by(
environment_id=environment_id,
resource_type=resource_type,
uuid=uuid
).first()
if mapping:
try:
return int(mapping.remote_integer_id)
except ValueError:
return None
return None
# [/DEF:get_remote_id:Function]
# [DEF:get_remote_ids_batch:Function]
# @PURPOSE: Retrieves remote integer IDs for a list of universal UUIDs efficiently.
# @PARAM: environment_id (str)
# @PARAM: resource_type (ResourceType)
# @PARAM: uuids (List[str])
# @RETURN: Dict[str, int] - Mapping of UUID -> Integer ID
def get_remote_ids_batch(self, environment_id: str, resource_type: ResourceType, uuids: List[str]) -> Dict[str, int]:
if not uuids:
return {}
mappings = self.db.query(ResourceMapping).filter(
ResourceMapping.environment_id == environment_id,
ResourceMapping.resource_type == resource_type,
ResourceMapping.uuid.in_(uuids)
).all()
result = {}
for m in mappings:
try:
result[m.uuid] = int(m.remote_integer_id)
except ValueError:
pass
return result
# [/DEF:get_remote_ids_batch:Function]
# [/DEF:IdMappingService:Class]
# [/DEF:backend.src.core.mapping_service:Module]