# [DEF:backend.src.core.utils.matching:Module] # # @SEMANTICS: fuzzy, matching, rapidfuzz, database, mapping # @PURPOSE: Provides utility functions for fuzzy matching database names. # @LAYER: Core # @RELATION: DEPENDS_ON -> rapidfuzz # # @INVARIANT: Confidence scores are returned as floats between 0.0 and 1.0. # [SECTION: IMPORTS] from rapidfuzz import fuzz, process from typing import List, Dict # [/SECTION] # [DEF:suggest_mappings:Function] # @PURPOSE: Suggests mappings between source and target databases using fuzzy matching. # @PRE: source_databases and target_databases are lists of dictionaries with 'uuid' and 'database_name'. # @POST: Returns a list of suggested mappings with confidence scores. # @PARAM: source_databases (List[Dict]) - Databases from the source environment. # @PARAM: target_databases (List[Dict]) - Databases from the target environment. # @PARAM: threshold (int) - Minimum confidence score (0-100). # @RETURN: List[Dict] - Suggested mappings. def suggest_mappings(source_databases: List[Dict], target_databases: List[Dict], threshold: int = 60) -> List[Dict]: """ Suggest mappings between source and target databases using fuzzy matching. """ suggestions = [] if not target_databases: return suggestions target_names = [db['database_name'] for db in target_databases] for s_db in source_databases: # Use token_sort_ratio as decided in research.md match = process.extractOne( s_db['database_name'], target_names, scorer=fuzz.token_sort_ratio ) if match: name, score, index = match if score >= threshold: suggestions.append({ "source_db_uuid": s_db['uuid'], "target_db_uuid": target_databases[index]['uuid'], "confidence": score / 100.0 }) return suggestions # [/DEF:suggest_mappings:Function] # [/DEF:backend.src.core.utils.matching:Module]