53
backend/src/core/utils/matching.py
Normal file
53
backend/src/core/utils/matching.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# [DEF:backend.src.core.utils.matching:Module]
|
||||
#
|
||||
# @SEMANTICS: fuzzy, matching, rapidfuzz, database, mapping
|
||||
# @PURPOSE: Provides utility functions for fuzzy matching database names.
|
||||
# @LAYER: Core
|
||||
# @RELATION: DEPENDS_ON -> rapidfuzz
|
||||
#
|
||||
# @INVARIANT: Confidence scores are returned as floats between 0.0 and 1.0.
|
||||
|
||||
# [SECTION: IMPORTS]
|
||||
from rapidfuzz import fuzz, process
|
||||
from typing import List, Dict
|
||||
# [/SECTION]
|
||||
|
||||
# [DEF:suggest_mappings:Function]
|
||||
# @PURPOSE: Suggests mappings between source and target databases using fuzzy matching.
|
||||
# @PRE: source_databases and target_databases are lists of dictionaries with 'uuid' and 'database_name'.
|
||||
# @POST: Returns a list of suggested mappings with confidence scores.
|
||||
# @PARAM: source_databases (List[Dict]) - Databases from the source environment.
|
||||
# @PARAM: target_databases (List[Dict]) - Databases from the target environment.
|
||||
# @PARAM: threshold (int) - Minimum confidence score (0-100).
|
||||
# @RETURN: List[Dict] - Suggested mappings.
|
||||
def suggest_mappings(source_databases: List[Dict], target_databases: List[Dict], threshold: int = 60) -> List[Dict]:
|
||||
"""
|
||||
Suggest mappings between source and target databases using fuzzy matching.
|
||||
"""
|
||||
suggestions = []
|
||||
if not target_databases:
|
||||
return suggestions
|
||||
|
||||
target_names = [db['database_name'] for db in target_databases]
|
||||
|
||||
for s_db in source_databases:
|
||||
# Use token_sort_ratio as decided in research.md
|
||||
match = process.extractOne(
|
||||
s_db['database_name'],
|
||||
target_names,
|
||||
scorer=fuzz.token_sort_ratio
|
||||
)
|
||||
|
||||
if match:
|
||||
name, score, index = match
|
||||
if score >= threshold:
|
||||
suggestions.append({
|
||||
"source_db_uuid": s_db['uuid'],
|
||||
"target_db_uuid": target_databases[index]['uuid'],
|
||||
"confidence": score / 100.0
|
||||
})
|
||||
|
||||
return suggestions
|
||||
# [/DEF:suggest_mappings]
|
||||
|
||||
# [/DEF:backend.src.core.utils.matching]
|
||||
Reference in New Issue
Block a user