ss-tools/backend/src/core/utils/matching.py

# [DEF:backend.src.core.utils.matching:Module]
#
# @SEMANTICS: fuzzy, matching, rapidfuzz, database, mapping
# @PURPOSE:   Provides utility functions for fuzzy matching database names.
# @LAYER:     Core
# @RELATION:  DEPENDS_ON -> rapidfuzz
#
# @INVARIANT: Confidence scores are returned as floats between 0.0 and 1.0.

# [SECTION: IMPORTS]
from rapidfuzz import fuzz, process
from typing import List, Dict
# [/SECTION]

# [DEF:suggest_mappings:Function]
# @PURPOSE: Suggests mappings between source and target databases using fuzzy matching.
# @PRE:     source_databases and target_databases are lists of dictionaries with 'uuid' and 'database_name'.
# @POST:    Returns a list of suggested mappings with confidence scores.
# @PARAM:   source_databases (List[Dict]) - Databases from the source environment.
# @PARAM:   target_databases (List[Dict]) - Databases from the target environment.
# @PARAM:   threshold (int) - Minimum confidence score (0-100).
# @RETURN:  List[Dict] - Suggested mappings.
def suggest_mappings(source_databases: List[Dict], target_databases: List[Dict], threshold: int = 60) -> List[Dict]:
    """
    Suggest mappings between source and target databases using fuzzy matching.
    """
    suggestions = []
    if not target_databases:
        return suggestions

    target_names = [db['database_name'] for db in target_databases]

    for s_db in source_databases:
        # Use token_sort_ratio as decided in research.md
        match = process.extractOne(
            s_db['database_name'],
            target_names,
            scorer=fuzz.token_sort_ratio
        )

        if match:
            name, score, index = match
            if score >= threshold:
                suggestions.append({
                    "source_db_uuid": s_db['uuid'],
                    "target_db_uuid": target_databases[index]['uuid'],
                    "confidence": score / 100.0
                })

    return suggestions
# [/DEF:suggest_mappings:Function]

# [/DEF:backend.src.core.utils.matching:Module]