ss-tools/generate_semantic_map.py

# [DEF:generate_semantic_map:Module]
#
# @TIER: CRITICAL
# @SEMANTICS: semantic_analysis, parser, map_generator, compliance_checker, tier_validation, svelte_props, data_flow
# @PURPOSE:   Scans the codebase to generate a Semantic Map and Compliance Report based on the System Standard.
# @LAYER:     DevOps/Tooling
# @INVARIANT: All DEF anchors must have matching closing anchors; TIER determines validation strictness.
# @RELATION:  READS -> FileSystem
# @RELATION:  PRODUCES -> semantics/semantic_map.json
# @RELATION:  PRODUCES -> specs/project_map.md
# @RELATION:  PRODUCES -> semantics/reports/semantic_report_*.md

# [SECTION: IMPORTS]
import os
import re
import json
import datetime
import fnmatch
from enum import Enum
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any, Pattern, Tuple, Set

# Mock belief_scope for the script itself to avoid import issues
class belief_scope:
    # [DEF:__init__:Function]
    # @TIER: TRIVIAL
    # @PURPOSE: Mock init for self-containment.
    # @PRE: name is a string.
    # @POST: Instance initialized.
    def __init__(self, name):
        self.name = name
    # [/DEF:__init__:Function]

    # [DEF:__enter__:Function]
    # @TIER: TRIVIAL
    # @PURPOSE: Mock enter.
    # @PRE: Instance initialized.
    # @POST: Returns self.
    def __enter__(self):
        return self
    # [/DEF:__enter__:Function]

    # [DEF:__exit__:Function]
    # @TIER: TRIVIAL
    # @PURPOSE: Mock exit.
    # @PRE: Context entered.
    # @POST: Context exited.
    def __exit__(self, *args):
        pass
    # [/DEF:__exit__:Function]
# [/SECTION]

# [SECTION: CONFIGURATION]


class Tier(Enum):
    # [DEF:Tier:Class]
    # @TIER: TRIVIAL
    # @PURPOSE: Enumeration of semantic tiers defining validation strictness.
    CRITICAL = "CRITICAL"
    STANDARD = "STANDARD"
    TRIVIAL = "TRIVIAL"
    # [/DEF:Tier:Class]


class Severity(Enum):
    # [DEF:Severity:Class]
    # @TIER: TRIVIAL
    # @PURPOSE: Severity levels for compliance issues.
    ERROR = "ERROR"
    WARNING = "WARNING"
    INFO = "INFO"
    # [/DEF:Severity:Class]


PROJECT_ROOT = "."
IGNORE_DIRS = {
    ".git", "__pycache__", "node_modules", "venv", ".pytest_cache",
    ".kilocode", "backups", "logs", "semantics", "specs", ".venv"
}
IGNORE_FILES = {
    "package-lock.json", "poetry.lock", "yarn.lock"
}
OUTPUT_JSON = "semantics/semantic_map.json"
OUTPUT_COMPRESSED_MD = "specs/project_map.md"
REPORTS_DIR = "semantics/reports"

# Tier-based mandatory tags
TIER_MANDATORY_TAGS = {
    Tier.CRITICAL: {
        "Module": ["PURPOSE", "LAYER", "SEMANTICS", "TIER", "INVARIANT"],
        "Component": ["PURPOSE", "LAYER", "SEMANTICS", "TIER", "INVARIANT"],
        "Function": ["PURPOSE", "PRE", "POST"],
        "Class": ["PURPOSE", "TIER"]
    },
    Tier.STANDARD: {
        "Module": ["PURPOSE", "LAYER", "SEMANTICS", "TIER"],
        "Component": ["PURPOSE", "LAYER", "SEMANTICS", "TIER"],
        "Function": ["PURPOSE", "PRE", "POST"],
        "Class": ["PURPOSE", "TIER"]
    },
    Tier.TRIVIAL: {
        "Module": ["PURPOSE", "TIER"],
        "Component": ["PURPOSE", "TIER"],
        "Function": ["PURPOSE"],
        "Class": ["PURPOSE", "TIER"]
    }
}

# Tier-based belief state requirements
TIER_BELIEF_REQUIRED = {
    Tier.CRITICAL: True,
    Tier.STANDARD: True,
    Tier.TRIVIAL: False
}

# [/SECTION]

# [DEF:ComplianceIssue:Class]
# @TIER: TRIVIAL
# @PURPOSE: Represents a single compliance issue with severity.
@dataclass
class ComplianceIssue:
    message: str
    severity: Severity
    line_number: Optional[int] = None

    def to_dict(self) -> Dict[str, Any]:
        return {
            "message": self.message,
            "severity": self.severity.value,
            "line_number": self.line_number
        }
# [/DEF:ComplianceIssue:Class]


# [DEF:SemanticEntity:Class]
# @TIER: CRITICAL
# @PURPOSE: Represents a code entity (Module, Function, Component) found during parsing.
# @INVARIANT: start_line is always set; end_line is set upon closure; tier defaults to STANDARD.
class SemanticEntity:
    # [DEF:__init__:Function]
    # @TIER: STANDARD
    # @PURPOSE: Initializes a new SemanticEntity instance.
    # @PRE:     name, type_, start_line, file_path are provided.
    # @POST:    Instance is initialized with default values.
    def __init__(self, name: str, type_: str, start_line: int, file_path: str):
        with belief_scope("__init__"):
            self.name = name
            self.type = type_
            self.start_line = start_line
            self.end_line: Optional[int] = None
            self.file_path = file_path
            self.tags: Dict[str, str] = {}
            self.relations: List[Dict[str, str]] = []
            self.children: List['SemanticEntity'] = []
            self.parent: Optional['SemanticEntity'] = None
            self.compliance_issues: List[ComplianceIssue] = []
            self.has_belief_scope: bool = False
            self.has_console_log: bool = False
            # New fields for enhanced Svelte analysis
            self.props: List[Dict[str, Any]] = []
            self.events: List[str] = []
            self.data_flow: List[Dict[str, str]] = []
    # [/DEF:__init__:Function]

    # [DEF:get_tier:Function]
    # @TIER: STANDARD
    # @PURPOSE: Returns the tier of the entity, defaulting to STANDARD.
    # @PRE:     tags dictionary is accessible.
    # @POST:    Returns Tier enum value.
    def get_tier(self) -> Tier:
        with belief_scope("get_tier"):
            tier_str = self.tags.get("TIER", "STANDARD").upper()
            try:
                return Tier(tier_str)
            except ValueError:
                return Tier.STANDARD
    # [/DEF:get_tier:Function]

    # [DEF:to_dict:Function]
    # @TIER: STANDARD
    # @PURPOSE: Serializes the entity to a dictionary for JSON output.
    # @PRE:     Entity is fully populated.
    # @POST:    Returns a dictionary representation.
    def to_dict(self) -> Dict[str, Any]:
        with belief_scope("to_dict"):
            result = {
                "name": self.name,
                "type": self.type,
                "tier": self.get_tier().value,
                "start_line": self.start_line,
                "end_line": self.end_line,
                "tags": self.tags,
                "relations": self.relations,
                "children": [c.to_dict() for c in self.children],
                "compliance": {
                    "valid": len([i for i in self.compliance_issues if i.severity == Severity.ERROR]) == 0,
                    "issues": [i.to_dict() for i in self.compliance_issues],
                    "score": self.get_score()
                }
            }
            if self.props:
                result["props"] = self.props
            if self.events:
                result["events"] = self.events
            if self.data_flow:
                result["data_flow"] = self.data_flow
            return result
    # [/DEF:to_dict:Function]

    # [DEF:validate:Function]
    # @TIER: CRITICAL
    # @PURPOSE: Checks for semantic compliance based on TIER requirements.
    # @PRE:     Entity structure is complete; tier is determined.
    # @POST:    Populates self.compliance_issues with severity levels.
    # @SIDE_EFFECT: Modifies self.compliance_issues list.
    def validate(self):
        with belief_scope("validate"):
            tier = self.get_tier()

            # 1. Check Closure (required for ALL tiers)
            if self.end_line is None:
                self.compliance_issues.append(ComplianceIssue(
                    f"Unclosed Anchor: [DEF:{self.name}:{self.type}] started at line {self.start_line}",
                    Severity.ERROR,
                    self.start_line
                ))

            # 2. Check Mandatory Tags based on TIER
            required = TIER_MANDATORY_TAGS.get(tier, {}).get(self.type, [])
            for req_tag in required:
                found = False
                for existing_tag in self.tags:
                    if existing_tag.upper() == req_tag:
                        found = True
                        break
                if not found:
                    severity = Severity.ERROR if tier == Tier.CRITICAL else Severity.WARNING
                    self.compliance_issues.append(ComplianceIssue(
                        f"Missing Mandatory Tag: @{req_tag} (required for {tier.value} tier)",
                        severity,
                        self.start_line
                    ))

            # 3. Check for Belief State Logging based on TIER
            if self.type == "Function":
                belief_required = TIER_BELIEF_REQUIRED.get(tier, False)
                if belief_required:
                    is_python = self.file_path.endswith(".py")
                    has_belief = self.has_belief_scope if is_python else self.has_console_log

                    if not has_belief:
                        # Check if it's a special case (logger.py or mock functions)
                        if "logger.py" not in self.file_path and "__" not in self.name:
                            severity = Severity.ERROR if tier == Tier.CRITICAL else Severity.WARNING
                            log_type = "belief_scope" if is_python else "console.log with [ID][STATE]"
                            self.compliance_issues.append(ComplianceIssue(
                                f"Missing Belief State Logging: Function should use {log_type} (required for {tier.value} tier)",
                                severity,
                                self.start_line
                            ))

            # 4. Check for @INVARIANT in CRITICAL tier
            if tier == Tier.CRITICAL and self.type in ["Module", "Component", "Class"]:
                if "INVARIANT" not in [k.upper() for k in self.tags.keys()]:
                    self.compliance_issues.append(ComplianceIssue(
                        f"Missing @INVARIANT tag (required for CRITICAL tier)",
                        Severity.ERROR,
                        self.start_line
                    ))

            # Recursive validation
            for child in self.children:
                child.validate()
    # [/DEF:validate:Function]

    # [DEF:get_score:Function]
    # @TIER: STANDARD
    # @PURPOSE: Calculates a compliance score (0.0 to 1.0) based on tier requirements.
    # @PRE:     validate() has been called.
    # @POST:    Returns a float score.
    def get_score(self) -> float:
        with belief_scope("get_score"):
            if self.end_line is None:
                return 0.0

            tier = self.get_tier()
            score = 1.0

            # Count issues by severity
            errors = len([i for i in self.compliance_issues if i.severity == Severity.ERROR])
            warnings = len([i for i in self.compliance_issues if i.severity == Severity.WARNING])

            # Penalties
            score -= errors * 0.3
            score -= warnings * 0.1

            # Check mandatory tags
            required = TIER_MANDATORY_TAGS.get(tier, {}).get(self.type, [])
            if required:
                found_count = 0
                for req_tag in required:
                    for existing_tag in self.tags:
                        if existing_tag.upper() == req_tag:
                            found_count += 1
                            break
                if found_count < len(required):
                    score -= 0.2 * (1 - (found_count / len(required)))

            return max(0.0, score)
    # [/DEF:get_score:Function]
# [/DEF:SemanticEntity:Class]


# [DEF:get_patterns:Function]
# @TIER: STANDARD
# @PURPOSE: Returns regex patterns for a specific language.
# @PRE:     lang is either 'python' or 'svelte_js'.
# @POST:    Returns a dictionary of compiled regex patterns.
# @PARAM:   lang (str) - 'python' or 'svelte_js'
def get_patterns(lang: str) -> Dict[str, Pattern]:
    with belief_scope("get_patterns"):
        if lang == "python":
            return {
                "anchor_start": re.compile(r"#\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
                "anchor_end": re.compile(r"#\s*\[/DEF:(?P<name>[\w\.]+)(?::\w+)?\]"),
                "tag": re.compile(r"#\s*@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
                "relation": re.compile(r"#\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
                "func_def": re.compile(r"^\s*(async\s+)?def\s+(?P<name>\w+)"),
                "belief_scope": re.compile(r"with\s+(\w+\.)?belief_scope\("),
            }
        else:
            return {
                "html_anchor_start": re.compile(r"<!--\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]\s*-->"),
                "html_anchor_end": re.compile(r"<!--\s*\[/DEF:(?P<name>[\w\.]+)(?::\w+)?\]\s*-->"),
                "js_anchor_start": re.compile(r"//\s*\[DEF:(?P<name>[\w\.]+):(?P<type>\w+)\]"),
                "js_anchor_end": re.compile(r"//\s*\[/DEF:(?P<name>[\w\.]+)(?::\w+)?\]"),
                "html_tag": re.compile(r"@(?P<tag>[A-Z_]+):\s*(?P<value>.*)"),
                "jsdoc_tag": re.compile(r"\*\s*@(?P<tag>[a-zA-Z]+)\s+(?P<value>.*)"),
                "relation": re.compile(r"//\s*@RELATION:\s*(?P<type>\w+)\s*->\s*(?P<target>.*)"),
                "func_def": re.compile(r"^\s*(export\s+)?(async\s+)?function\s+(?P<name>\w+)"),
                "console_log": re.compile(r"console\.log\s*\(\s*['\"]\[[\w_]+\]\[[\w_]+\]"),
                # Svelte-specific patterns
                "export_let": re.compile(r"export\s+let\s+(?P<name>\w+)(?:\s*:\s*(?P<type>[\w\[\]|<>]+))?(?:\s*=\s*(?P<default>[^;]+))?"),
                "create_event_dispatcher": re.compile(r"createEventDispatcher\s*<\s*\{\s*(?P<events>[^}]+)\s*\}\s*\>"),
                "dispatch_call": re.compile(r"dispatch\s*\(\s*['\"](?P<event>\w+)['\"]"),
                "store_subscription": re.compile(r"\$(?P<store>\w+)"),
                "store_import": re.compile(r"import\s*\{[^}]*\b(?P<store>\w+Store|store)\b[^}]*\}\s*from\s*['\"][^'\"]*stores?[^'\"]*['\"]"),
            }
# [/DEF:get_patterns:Function]


# [DEF:extract_svelte_props:Function]
# @TIER: STANDARD
# @PURPOSE: Extracts props from Svelte component script section.
# @PRE:     lines is a list of file lines, start_idx is the starting line index.
# @POST:    Returns list of prop definitions.
def extract_svelte_props(lines: List[str], start_idx: int) -> List[Dict[str, Any]]:
    with belief_scope("extract_svelte_props"):
        props = []
        pattern = re.compile(r"export\s+let\s+(?P<name>\w+)(?:\s*:\s*(?P<type>[\w\[\]|<>\s]+))?(?:\s*=\s*(?P<default>[^;]+))?;")

        for i in range(start_idx, min(start_idx + 100, len(lines))):  # Look ahead 100 lines
            line = lines[i].strip()
            # Stop at script end or function definitions
            if line == "</script>" or line.startswith("function ") or line.startswith("const "):
                break

            match = pattern.search(line)
            if match:
                prop = {
                    "name": match.group("name"),
                    "type": match.group("type") if match.group("type") else "any",
                    "default": match.group("default").strip() if match.group("default") else None
                }
                props.append(prop)

        return props
# [/DEF:extract_svelte_props:Function]


# [DEF:extract_svelte_events:Function]
# @TIER: STANDARD
# @PURPOSE: Extracts dispatched events from Svelte component.
# @PRE:     lines is a list of file lines.
# @POST:    Returns list of event names.
def extract_svelte_events(lines: List[str]) -> List[str]:
    with belief_scope("extract_svelte_events"):
        events = set()

        # Pattern 1: createEventDispatcher with type definition
        dispatcher_pattern = re.compile(r"createEventDispatcher\s*<\s*\{\s*([^}]+)\s*\}\s*\>")
        # Pattern 2: dispatch('eventName')
        dispatch_pattern = re.compile(r"dispatch\s*\(\s*['\"](\w+)['\"]")

        for line in lines:
            line = line.strip()

            # Check for typed dispatcher
            match = dispatcher_pattern.search(line)
            if match:
                events_str = match.group(1)
                # Extract event names from type definition like: submit: Type; cancel: Type
                for event_def in events_str.split(";"):
                    if ":" in event_def:
                        event_name = event_def.split(":")[0].strip()
                        if event_name:
                            events.add(event_name)

            # Check for dispatch calls
            match = dispatch_pattern.search(line)
            if match:
                events.add(match.group(1))

        return sorted(list(events))
# [/DEF:extract_svelte_events:Function]


# [DEF:extract_data_flow:Function]
# @TIER: STANDARD
# @PURPOSE: Extracts store subscriptions and data flow from Svelte component.
# @PRE:     lines is a list of file lines.
# @POST:    Returns list of data flow descriptors.
def extract_data_flow(lines: List[str]) -> List[Dict[str, str]]:
    with belief_scope("extract_data_flow"):
        data_flow = []

        # Pattern for store subscriptions: $storeName
        subscription_pattern = re.compile(r"\$(?P<store>\w+)")
        # Pattern for store imports
        import_pattern = re.compile(r"import\s*\{[^}]*\}\s*from\s*['\"][^'\"]*stores?[^'\"]*['\"]")

        store_names = set()

        # First pass: find store imports
        for line in lines:
            if import_pattern.search(line):
                # Extract imported names
                match = re.search(r"import\s*\{([^}]+)\}", line)
                if match:
                    imports = match.group(1).split(",")
                    for imp in imports:
                        store_names.add(imp.strip().split()[0])

        # Second pass: find subscriptions
        for i, line in enumerate(lines):
            line_stripped = line.strip()

            # Skip comments
            if line_stripped.startswith("//") or line_stripped.startswith("*"):
                continue

            # Find store subscriptions
            for match in subscription_pattern.finditer(line):
                store_name = match.group("store")
                if store_name not in ["if", "while", "for", "switch"]:
                    flow_type = "READS_FROM"
                    # Check if it's an assignment (write)
                    if "=" in line and line.index("$") > line.index("="):
                        flow_type = "WRITES_TO"

                    data_flow.append({
                        "store": store_name,
                        "type": flow_type,
                        "line": i + 1
                    })

        return data_flow
# [/DEF:extract_data_flow:Function]


# [DEF:parse_file:Function]
# @TIER: CRITICAL
# @PURPOSE: Parses a single file to extract semantic entities with tier awareness and enhanced Svelte analysis.
# @PRE:     full_path, rel_path, lang are valid strings.
# @POST:    Returns extracted entities and list of issues.
# @INVARIANT: Every opened anchor must have a matching closing anchor for valid compliance.
# @PARAM:   full_path - Absolute path to file.
# @PARAM:   rel_path - Relative path from project root.
# @PARAM:   lang - Language identifier.
def parse_file(full_path: str, rel_path: str, lang: str) -> Tuple[List[SemanticEntity], List[ComplianceIssue]]:
    with belief_scope("parse_file"):
        issues: List[ComplianceIssue] = []
        try:
            with open(full_path, 'r', encoding='utf-8') as f:
                lines = f.readlines()
        except Exception as e:
            return [], [ComplianceIssue(f"Could not read file {rel_path}: {e}", Severity.ERROR)]

        stack: List[SemanticEntity] = []
        file_entities: List[SemanticEntity] = []
        orphan_functions: List[SemanticEntity] = []
        patterns = get_patterns(lang)

        # Track current module for grouping orphans
        current_module: Optional[SemanticEntity] = None

        for i, line in enumerate(lines):
            lineno = i + 1
            line_stripped = line.strip()

            # 1. Check for Anchor Start
            match_start = None
            if lang == "python":
                match_start = patterns["anchor_start"].search(line_stripped)
            else:
                match_start = patterns["html_anchor_start"].search(line_stripped) or patterns["js_anchor_start"].search(line_stripped)

            if match_start:
                name = match_start.group("name")
                type_ = match_start.group("type")
                entity = SemanticEntity(name, type_, lineno, rel_path)

                # Track module-level entities
                if type_ == "Module" and not stack:
                    current_module = entity

                if stack:
                    parent = stack[-1]
                    parent.children.append(entity)
                    entity.parent = parent
                else:
                    file_entities.append(entity)

                stack.append(entity)
                continue

            # 2. Check for Anchor End
            match_end = None
            if lang == "python":
                match_end = patterns["anchor_end"].search(line_stripped)
            else:
                match_end = patterns["html_anchor_end"].search(line_stripped) or patterns["js_anchor_end"].search(line_stripped)

            if match_end:
                name = match_end.group("name")

                if not stack:
                    issues.append(ComplianceIssue(
                        f"{rel_path}:{lineno} Found closing anchor [/DEF:{name}] without opening anchor.",
                        Severity.ERROR,
                        lineno
                    ))
                    continue

                top = stack[-1]
                if top.name == name:
                    top.end_line = lineno
                    stack.pop()
                else:
                    issues.append(ComplianceIssue(
                        f"{rel_path}:{lineno} Mismatched closing anchor. Expected [/DEF:{top.name}], found [/DEF:{name}].",
                        Severity.ERROR,
                        lineno
                    ))
                continue

            # 3. Check for Naked Functions (Missing Contracts) - track as orphans
            if "func_def" in patterns:
                match_func = patterns["func_def"].search(line_stripped)
                if match_func:
                    func_name = match_func.group("name")
                    is_covered = False
                    if stack:
                        current = stack[-1]
                        if current.type == "Function" and current.name == func_name:
                            is_covered = True

                    if not is_covered:
                        # Create orphan function entity
                        orphan = SemanticEntity(func_name, "Function", lineno, rel_path)
                        orphan.tags["PURPOSE"] = f"Auto-detected function (orphan)"
                        orphan.tags["TIER"] = "TRIVIAL"
                        orphan.end_line = lineno  # Mark as closed immediately
                        orphan_functions.append(orphan)

            # 4. Check for Tags/Relations
            if stack:
                current = stack[-1]

                match_rel = patterns["relation"].search(line_stripped)
                if match_rel:
                    current.relations.append({
                        "type": match_rel.group("type"),
                        "target": match_rel.group("target")
                    })
                    continue

                match_tag = None
                if lang == "python":
                    match_tag = patterns["tag"].search(line_stripped)
                elif lang == "svelte_js":
                    match_tag = patterns["html_tag"].search(line_stripped)
                    if not match_tag and ("/*" in line_stripped or "*" in line_stripped or "//" in line_stripped):
                         match_tag = patterns["jsdoc_tag"].search(line_stripped)

                if match_tag:
                    tag_name = match_tag.group("tag").upper()
                    tag_value = match_tag.group("value").strip()
                    current.tags[tag_name] = tag_value

                # Check for belief scope in implementation
                if lang == "python" and "belief_scope" in patterns:
                    if patterns["belief_scope"].search(line):
                        current.has_belief_scope = True

                # Check for console.log belief state in Svelte
                if lang == "svelte_js" and "console_log" in patterns:
                    if patterns["console_log"].search(line):
                        current.has_console_log = True

        # End of file check
        if stack:
            for unclosed in stack:
                issues.append(ComplianceIssue(
                    f"{rel_path}: Unclosed Anchor [DEF:{unclosed.name}:{unclosed.type}] at end of file (started line {unclosed.start_line})",
                    Severity.ERROR,
                    unclosed.start_line
                ))
                if unclosed.parent is None and unclosed not in file_entities:
                     file_entities.append(unclosed)

        # Post-processing for Svelte files
        if lang == "svelte_js":
            for entity in file_entities:
                if entity.type == "Component":
                    # Extract props, events, and data flow
                    entity.props = extract_svelte_props(lines, entity.start_line)
                    entity.events = extract_svelte_events(lines)
                    entity.data_flow = extract_data_flow(lines)

        # Group orphan functions under their module
        if orphan_functions:
            if current_module:
                # Add orphans as children of the module
                for orphan in orphan_functions:
                    orphan.parent = current_module
                    current_module.children.append(orphan)
            else:
                # Create a synthetic module for orphans
                synthetic_module = SemanticEntity(
                    os.path.splitext(os.path.basename(rel_path))[0],
                    "Module",
                    1,
                    rel_path
                )
                synthetic_module.tags["PURPOSE"] = f"Auto-generated module for {rel_path}"
                synthetic_module.tags["TIER"] = "TRIVIAL"
                synthetic_module.tags["LAYER"] = "Unknown"
                synthetic_module.end_line = len(lines)

                for orphan in orphan_functions:
                    orphan.parent = synthetic_module
                    synthetic_module.children.append(orphan)

                file_entities.append(synthetic_module)

        return file_entities, issues
# [/DEF:parse_file:Function]


# [DEF:SemanticMapGenerator:Class]
# @TIER: CRITICAL
# @PURPOSE: Orchestrates the mapping process with tier-based validation.
# @INVARIANT: All entities are validated according to their TIER requirements.
class SemanticMapGenerator:
    # [DEF:__init__:Function]
    # @TIER: STANDARD
    # @PURPOSE: Initializes the generator with a root directory.
    # @PRE:     root_dir is a valid path string.
    # @POST:    Generator instance is ready.
    def __init__(self, root_dir: str):
        with belief_scope("__init__"):
            self.root_dir = root_dir
            self.entities: List[SemanticEntity] = []
            self.file_scores: Dict[str, float] = {}
            self.global_issues: List[ComplianceIssue] = []
            self.ignored_patterns = self._load_gitignore()
    # [/DEF:__init__:Function]

    # [DEF:_load_gitignore:Function]
    # @TIER: STANDARD
    # @PURPOSE: Loads patterns from .gitignore file.
    # @PRE:     .gitignore exists in root_dir.
    # @POST:    Returns set of ignore patterns.
    def _load_gitignore(self) -> Set[str]:
        with belief_scope("_load_gitignore"):
            patterns = set()
            ignore_file = os.path.join(self.root_dir, ".gitignore")
            if os.path.exists(ignore_file):
                with open(ignore_file, 'r') as f:
                    for line in f:
                        line = line.strip()
                        if line and not line.startswith("#"):
                            patterns.add(line)
            return patterns
    # [/DEF:_load_gitignore:Function]

    # [DEF:_is_ignored:Function]
    # @TIER: STANDARD
    # @PURPOSE: Checks if a path should be ignored based on .gitignore or hardcoded defaults.
    # @PRE:     rel_path is a valid relative path string.
    # @POST:    Returns True if the path should be ignored.
    def _is_ignored(self, rel_path: str) -> bool:
        with belief_scope("_is_ignored"):
            rel_path = rel_path.replace(os.sep, '/')

            parts = rel_path.split('/')
            for part in parts:
                if part in IGNORE_DIRS:
                    return True

            if os.path.basename(rel_path) in IGNORE_FILES:
                return True

            for pattern in self.ignored_patterns:
                if pattern.endswith('/'):
                    dir_pattern = pattern.rstrip('/')
                    if rel_path == dir_pattern or rel_path.startswith(pattern):
                        return True

                if rel_path.startswith("frontend/") and fnmatch.fnmatch(rel_path[9:], pattern):
                    return True
                if rel_path.startswith("backend/") and fnmatch.fnmatch(rel_path[8:], pattern):
                    return True

                if fnmatch.fnmatch(rel_path, pattern) or \
                   fnmatch.fnmatch(os.path.basename(rel_path), pattern) or \
                   any(fnmatch.fnmatch(part, pattern) for part in parts):
                    return True

            return False
    # [/DEF:_is_ignored:Function]

    # [DEF:run:Function]
    # @TIER: CRITICAL
    # @PURPOSE: Main execution flow.
    # @PRE:     Generator is initialized.
    # @POST:    Semantic map and reports are generated.
    # @RELATION: CALLS -> _walk_and_parse
    # @RELATION: CALLS -> _generate_artifacts
    def run(self):
        with belief_scope("run"):
            print(f"Starting Semantic Map Generation in {self.root_dir}...")
            self._walk_and_parse()
            self._generate_artifacts()
            print("Done.")
    # [/DEF:run:Function]

    # [DEF:_walk_and_parse:Function]
    # @TIER: CRITICAL
    # @PURPOSE: Recursively walks directories and triggers parsing.
    # @PRE:     root_dir exists.
    # @POST:    All files are scanned and entities extracted.
    def _walk_and_parse(self):
        with belief_scope("_walk_and_parse"):
            for root, dirs, files in os.walk(self.root_dir):
                dirs[:] = [d for d in dirs if not self._is_ignored(os.path.relpath(os.path.join(root, d), self.root_dir) + "/")]

                for file in files:
                    file_path = os.path.join(root, file)
                    rel_path = os.path.relpath(file_path, self.root_dir)

                    if self._is_ignored(rel_path):
                        continue

                    lang = None
                    if file.endswith(".py"):
                        lang = "python"
                    elif file.endswith((".svelte", ".js", ".ts")):
                        lang = "svelte_js"

                    if lang:
                        entities, issues = parse_file(file_path, rel_path, lang)
                        self.global_issues.extend(issues)

                        if entities:
                            self._process_file_results(rel_path, entities)
    # [/DEF:_walk_and_parse:Function]

    # [DEF:_process_file_results:Function]
    # @TIER: STANDARD
    # @PURPOSE: Validates entities and calculates file scores with tier awareness.
    # @PRE:     Entities have been parsed from the file.
    # @POST:    File score is calculated and issues collected.
    def _process_file_results(self, rel_path: str, entities: List[SemanticEntity]):
        with belief_scope("_process_file_results"):
            total_score = 0
            count = 0

            # [DEF:validate_recursive:Function]
            # @TIER: STANDARD
            # @PURPOSE: Recursively validates a list of entities.
            # @PRE:     ent_list is a list of SemanticEntity objects.
            # @POST:    All entities and their children are validated.
            def validate_recursive(ent_list):
                with belief_scope("validate_recursive"):
                    nonlocal total_score, count
                    for e in ent_list:
                        e.validate()
                        total_score += e.get_score()
                        count += 1
                        validate_recursive(e.children)
            # [/DEF:validate_recursive:Function]

            validate_recursive(entities)

            self.entities.extend(entities)
            self.file_scores[rel_path] = (total_score / count) if count > 0 else 0.0
    # [/DEF:_process_file_results:Function]

    # [DEF:_generate_artifacts:Function]
    # @TIER: CRITICAL
    # @PURPOSE: Writes output files with tier-based compliance data.
    # @PRE:     Parsing and validation are complete.
    # @POST:    JSON and Markdown artifacts are written to disk.
    def _generate_artifacts(self):
        with belief_scope("_generate_artifacts"):
            full_map = {
                "project_root": self.root_dir,
                "generated_at": datetime.datetime.now().isoformat(),
                "modules": [e.to_dict() for e in self.entities]
            }

            os.makedirs(os.path.dirname(OUTPUT_JSON), exist_ok=True)
            with open(OUTPUT_JSON, 'w', encoding='utf-8') as f:
                json.dump(full_map, f, indent=2)
            print(f"Generated {OUTPUT_JSON}")

            self._generate_report()
            self._generate_compressed_map()
    # [/DEF:_generate_artifacts:Function]

    # [DEF:_generate_report:Function]
    # @TIER: CRITICAL
    # @PURPOSE: Generates the Markdown compliance report with severity levels.
    # @PRE:     File scores and issues are available.
    # @POST:    Markdown report is created in reports directory.
    def _generate_report(self):
        with belief_scope("_generate_report"):
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            report_path = os.path.join(REPORTS_DIR, f"semantic_report_{timestamp}.md")
            os.makedirs(REPORTS_DIR, exist_ok=True)

            total_files = len(self.file_scores)
            avg_score = sum(self.file_scores.values()) / total_files if total_files > 0 else 0

            # Count issues by severity
            error_count = len([i for i in self.global_issues if i.severity == Severity.ERROR])
            warning_count = len([i for i in self.global_issues if i.severity == Severity.WARNING])

            with open(report_path, 'w', encoding='utf-8') as f:
                f.write(f"# Semantic Compliance Report\n\n")
                f.write(f"**Generated At:** {datetime.datetime.now().isoformat()}\n")
                f.write(f"**Global Compliance Score:** {avg_score:.1%}\n")
                f.write(f"**Scanned Files:** {total_files}\n")
                f.write(f"**Global Errors:** {error_count} | **Warnings:** {warning_count}\n\n")

                if self.global_issues:
                    f.write("## Critical Parsing Errors\n")
                    for issue in self.global_issues:
                        icon = "🔴" if issue.severity == Severity.ERROR else "🟡" if issue.severity == Severity.WARNING else "ℹ️"
                        f.write(f"- {icon} {issue.message}\n")
                    f.write("\n")

                f.write("## File Compliance Status\n")
                f.write("| File | Score | Tier | Issues |\n")
                f.write("|------|-------|------|--------|\n")

                sorted_files = sorted(self.file_scores.items(), key=lambda x: x[1])

                for file_path, score in sorted_files:
                    issues = []
                    tier = "N/A"
                    self._collect_issues(self.entities, file_path, issues, tier)

                    status_icon = "🟢" if score == 1.0 else "🟡" if score > 0.5 else "🔴"
                    issue_text = "<br>".join([f"{'🔴' if i.severity == Severity.ERROR else '🟡'} {i.message}" for i in issues[:3]])
                    if len(issues) > 3:
                        issue_text += f"<br>... and {len(issues) - 3} more"
                    if not issues:
                        issue_text = "OK"

                    f.write(f"| {file_path} | {status_icon} {score:.0%} | {tier} | {issue_text} |\n")

            print(f"Generated {report_path}")
    # [/DEF:_generate_report:Function]

    # [DEF:_collect_issues:Function]
    # @TIER: STANDARD
    # @PURPOSE: Helper to collect issues for a specific file from the entity tree.
    # @PRE:     entities list and file_path are valid.
    # @POST:    issues list is populated with compliance issues.
    def _collect_issues(self, entities: List[SemanticEntity], file_path: str, issues: List[ComplianceIssue], tier: str):
        with belief_scope("_collect_issues"):
            for e in entities:
                if e.file_path == file_path:
                    issues.extend(e.compliance_issues)
                    tier = e.get_tier().value
                self._collect_issues(e.children, file_path, issues, tier)
    # [/DEF:_collect_issues:Function]

    # [DEF:_generate_compressed_map:Function]
    # @TIER: CRITICAL
    # @PURPOSE: Generates the token-optimized project map with enhanced Svelte details.
    # @PRE:     Entities have been processed.
    # @POST:    Markdown project map is written.
    def _generate_compressed_map(self):
        with belief_scope("_generate_compressed_map"):
            os.makedirs(os.path.dirname(OUTPUT_COMPRESSED_MD), exist_ok=True)

            with open(OUTPUT_COMPRESSED_MD, 'w', encoding='utf-8') as f:
                f.write("# Project Semantic Map\n\n")
                f.write("> Compressed view for AI Context. Generated automatically.\n\n")

                for entity in self.entities:
                    self._write_entity_md(f, entity, level=0)

            print(f"Generated {OUTPUT_COMPRESSED_MD}")
    # [/DEF:_generate_compressed_map:Function]

    # [DEF:_write_entity_md:Function]
    # @TIER: CRITICAL
    # @PURPOSE: Recursive helper to write entity tree to Markdown with tier badges and enhanced details.
    # @PRE:     f is an open file handle, entity is valid.
    # @POST:    Entity details are written to the file.
    def _write_entity_md(self, f, entity: SemanticEntity, level: int):
        with belief_scope("_write_entity_md"):
            indent = "  " * level

            icon = "📦"
            if entity.type == "Component": icon = "🧩"
            elif entity.type == "Function": icon = "ƒ"
            elif entity.type == "Class": icon = "ℂ"
            elif entity.type == "Store": icon = "🗄️"

            tier_badge = ""
            tier = entity.get_tier()
            if tier == Tier.CRITICAL:
                tier_badge = " `[CRITICAL]`"
            elif tier == Tier.TRIVIAL:
                tier_badge = " `[TRIVIAL]`"

            f.write(f"{indent}- {icon} **{entity.name}** (`{entity.type}`){tier_badge}\n")

            purpose = entity.tags.get("PURPOSE") or entity.tags.get("purpose")
            layer = entity.tags.get("LAYER") or entity.tags.get("layer")
            invariant = entity.tags.get("INVARIANT")

            if purpose:
                f.write(f"{indent}  - 📝 {purpose}\n")
            if layer:
                f.write(f"{indent}  - 🏗️ Layer: {layer}\n")
            if invariant:
                f.write(f"{indent}  - 🔒 Invariant: {invariant}\n")

            # Write Props for Components
            if entity.props:
                props_str = ", ".join([f"{p['name']}: {p['type']}" for p in entity.props[:5]])
                if len(entity.props) > 5:
                    props_str += f"... (+{len(entity.props) - 5})"
                f.write(f"{indent}  - 📥 Props: {props_str}\n")

            # Write Events for Components
            if entity.events:
                events_str = ", ".join(entity.events[:5])
                if len(entity.events) > 5:
                    events_str += f"... (+{len(entity.events) - 5})"
                f.write(f"{indent}  - ⚡ Events: {events_str}\n")

            # Write Data Flow
            if entity.data_flow:
                unique_flows = {}
                for flow in entity.data_flow:
                    key = f"{flow['type']} -> {flow['store']}"
                    unique_flows[key] = flow

                for flow_key, flow in list(unique_flows.items())[:3]:
                    arrow = "⬅️" if flow['type'] == "READS_FROM" else "➡️"
                    f.write(f"{indent}  - {arrow} {flow['type']} `{flow['store']}`\n")

            # Write Relations
            for rel in entity.relations:
                if rel['type'] in ['DEPENDS_ON', 'CALLS', 'INHERITS', 'IMPLEMENTS', 'DISPATCHES']:
                     f.write(f"{indent}  - 🔗 {rel['type']} -> `{rel['target']}`\n")

            if level < 3:
                for child in entity.children:
                    self._write_entity_md(f, child, level + 1)
    # [/DEF:_write_entity_md:Function]

# [/DEF:SemanticMapGenerator:Class]


if __name__ == "__main__":
    generator = SemanticMapGenerator(PROJECT_ROOT)
    generator.run()

# [/DEF:generate_semantic_map:Module]