# [DEF:backend.src.core.mapping_service:Module] # # @TIER: CRITICAL # @SEMANTICS: mapping, ids, synchronization, environments, cross-filters # @PURPOSE: Service for tracking and synchronizing Superset Resource IDs (UUID <-> Integer ID) # @LAYER: Core # @RELATION: DEPENDS_ON -> backend.src.models.mapping (ResourceMapping, ResourceType) # @RELATION: DEPENDS_ON -> backend.src.core.logger # @TEST_DATA: mock_superset_resources -> {'chart': [{'id': 42, 'uuid': '1234', 'slice_name': 'test'}], 'dataset': [{'id': 99, 'uuid': '5678', 'table_name': 'data'}]} # # @INVARIANT: sync_environment must handle remote API failures gracefully. # [SECTION: IMPORTS] from typing import Dict, List, Optional from datetime import datetime, timezone from sqlalchemy.orm import Session from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.triggers.cron import CronTrigger from src.models.mapping import ResourceMapping, ResourceType from src.core.logger import logger, belief_scope # [/SECTION] # [DEF:IdMappingService:Class] # @TIER: CRITICAL # @PURPOSE: Service handling the cataloging and retrieval of remote Superset Integer IDs. class IdMappingService: # [DEF:__init__:Function] # @PURPOSE: Initializes the mapping service. def __init__(self, db_session: Session): self.db = db_session self.scheduler = BackgroundScheduler() self._sync_job = None # [/DEF:__init__:Function] # [DEF:start_scheduler:Function] # @PURPOSE: Starts the background scheduler with a given cron string. # @PARAM: cron_string (str) - Cron expression for the sync interval. # @PARAM: environments (List[str]) - List of environment IDs to sync. # @PARAM: superset_client_factory - Function to get a client for an environment. def start_scheduler(self, cron_string: str, environments: List[str], superset_client_factory): with belief_scope("IdMappingService.start_scheduler"): if self._sync_job: self.scheduler.remove_job(self._sync_job.id) logger.info("[IdMappingService.start_scheduler][Reflect] Removed existing sync job.") def sync_all(): for env_id in environments: client = superset_client_factory(env_id) if client: self.sync_environment(env_id, client) self._sync_job = self.scheduler.add_job( sync_all, CronTrigger.from_crontab(cron_string), id='id_mapping_sync_job', replace_existing=True ) if not self.scheduler.running: self.scheduler.start() logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Started background scheduler with cron: {cron_string}") else: logger.info(f"[IdMappingService.start_scheduler][Coherence:OK] Updated background scheduler with cron: {cron_string}") # [/DEF:start_scheduler:Function] # [DEF:sync_environment:Function] # @PURPOSE: Fully synchronizes mapping for a specific environment. # @PARAM: environment_id (str) - Target environment ID. # @PARAM: superset_client - Instance capable of hitting the Superset API. # @PRE: environment_id exists in the database. # @POST: ResourceMapping records for the environment are created or updated. def sync_environment(self, environment_id: str, superset_client, incremental: bool = False) -> None: """ Polls the Superset APIs for the target environment and updates the local mapping table. If incremental=True, only fetches items changed since the max last_synced_at date. """ with belief_scope("IdMappingService.sync_environment"): logger.info(f"[IdMappingService.sync_environment][Action] Starting sync for environment {environment_id} (incremental={incremental})") # Implementation Note: In a real scenario, superset_client needs to be an instance # capable of auth & iteration over /api/v1/chart/, /api/v1/dataset/, /api/v1/dashboard/ # Here we structure the logic according to the spec. types_to_poll = [ (ResourceType.CHART, "chart", "slice_name"), (ResourceType.DATASET, "dataset", "table_name"), (ResourceType.DASHBOARD, "dashboard", "slug") # Note: dashboard slug or dashboard_title ] total_synced = 0 total_deleted = 0 try: for res_enum, endpoint, name_field in types_to_poll: logger.debug(f"[IdMappingService.sync_environment][Explore] Polling {endpoint} endpoint") # Simulated API Fetch (Would be: superset_client.get(f"/api/v1/{endpoint}/")... ) # This relies on the superset API structure, e.g. { "result": [{"id": 1, "uuid": "...", name_field: "..."}] } # We assume superset_client provides a generic method to fetch all pages. try: since_dttm = None if incremental: from sqlalchemy.sql import func max_date = self.db.query(func.max(ResourceMapping.last_synced_at)).filter( ResourceMapping.environment_id == environment_id, ResourceMapping.resource_type == res_enum ).scalar() if max_date: # We subtract a bit for safety overlap from datetime import timedelta since_dttm = max_date - timedelta(minutes=5) logger.debug(f"[IdMappingService.sync_environment] Incremental sync since {since_dttm}") resources = superset_client.get_all_resources(endpoint, since_dttm=since_dttm) # Track which UUIDs we see in this sync cycle synced_uuids = set() for res in resources: res_uuid = res.get("uuid") raw_id = res.get("id") res_name = res.get(name_field) if not res_uuid or raw_id is None: continue synced_uuids.add(res_uuid) res_id = str(raw_id) # Store as string # Upsert Logic mapping = self.db.query(ResourceMapping).filter_by( environment_id=environment_id, resource_type=res_enum, uuid=res_uuid ).first() if mapping: mapping.remote_integer_id = res_id mapping.resource_name = res_name mapping.last_synced_at = datetime.now(timezone.utc) else: new_mapping = ResourceMapping( environment_id=environment_id, resource_type=res_enum, uuid=res_uuid, remote_integer_id=res_id, resource_name=res_name, last_synced_at=datetime.now(timezone.utc) ) self.db.add(new_mapping) total_synced += 1 # Delete stale mappings: rows for this env+type whose UUID # was NOT returned by the API (resource was deleted remotely) # We only do this on full syncs, because incremental syncs don't return all UUIDs if not incremental: stale_query = self.db.query(ResourceMapping).filter( ResourceMapping.environment_id == environment_id, ResourceMapping.resource_type == res_enum, ) if synced_uuids: stale_query = stale_query.filter( ResourceMapping.uuid.notin_(synced_uuids) ) deleted = stale_query.delete(synchronize_session="fetch") if deleted: total_deleted += deleted logger.info(f"[IdMappingService.sync_environment][Action] Removed {deleted} stale {endpoint} mapping(s) for {environment_id}") except Exception as loop_e: logger.error(f"[IdMappingService.sync_environment][Reason] Error polling {endpoint}: {loop_e}") # Continue to next resource type instead of blowing up the whole sync self.db.commit() logger.info(f"[IdMappingService.sync_environment][Coherence:OK] Successfully synced {total_synced} items and deleted {total_deleted} stale items.") except Exception as e: self.db.rollback() logger.error(f"[IdMappingService.sync_environment][Coherence:Failed] Critical sync failure: {e}") raise # [/DEF:sync_environment:Function] # [DEF:get_remote_id:Function] # @PURPOSE: Retrieves the remote integer ID for a given universal UUID. # @PARAM: environment_id (str) # @PARAM: resource_type (ResourceType) # @PARAM: uuid (str) # @RETURN: Optional[int] def get_remote_id(self, environment_id: str, resource_type: ResourceType, uuid: str) -> Optional[int]: mapping = self.db.query(ResourceMapping).filter_by( environment_id=environment_id, resource_type=resource_type, uuid=uuid ).first() if mapping: try: return int(mapping.remote_integer_id) except ValueError: return None return None # [/DEF:get_remote_id:Function] # [DEF:get_remote_ids_batch:Function] # @PURPOSE: Retrieves remote integer IDs for a list of universal UUIDs efficiently. # @PARAM: environment_id (str) # @PARAM: resource_type (ResourceType) # @PARAM: uuids (List[str]) # @RETURN: Dict[str, int] - Mapping of UUID -> Integer ID def get_remote_ids_batch(self, environment_id: str, resource_type: ResourceType, uuids: List[str]) -> Dict[str, int]: if not uuids: return {} mappings = self.db.query(ResourceMapping).filter( ResourceMapping.environment_id == environment_id, ResourceMapping.resource_type == resource_type, ResourceMapping.uuid.in_(uuids) ).all() result = {} for m in mappings: try: result[m.uuid] = int(m.remote_integer_id) except ValueError: pass return result # [/DEF:get_remote_ids_batch:Function] # [/DEF:IdMappingService:Class] # [/DEF:backend.src.core.mapping_service:Module]