chore: sync workflows, scripts, and configurations from MokoStandards

2026-01-30 02:15:07 +00:00
parent 2436054ae6
commit 0f2c0c1166
16 changed files with 5064 additions and 28 deletions
--- a/scripts/validate/auto_detect_platform.py
+++ b/scripts/validate/auto_detect_platform.py
@@ -0,0 +1,533 @@
+#!/usr/bin/env python3
+"""Auto-Detect Repository Platform v03.00.00 - Critical Validator Infrastructure.
+
+This script automatically detects repository platform types with confidence scoring
+and provides JSON/CLI output for automation workflows.
+
+Platform detection capabilities:
+    - Joomla/WaaS components (manifest patterns, version detection)
+    - Dolibarr/CRM modules (module.php, core/ structure)
+    - Generic repositories (fallback with confidence scoring)
+
+Usage:
+    python3 auto_detect_platform.py [--repo-path PATH] [--json] [--verbose] [--cache]
+
+Examples:
+    # Auto-detect current repository with JSON output
+    python3 auto_detect_platform.py --json
+
+    # Detect specific repository with caching
+    python3 auto_detect_platform.py --repo-path /path/to/repo --cache --verbose
+
+    # JSON output for CI/CD automation
+    python3 auto_detect_platform.py --json | jq '.platform_type'
+
+Exit codes:
+    0: Success (platform detected successfully)
+    1: Detection failed (no platform could be determined)
+    2: Configuration error (invalid arguments or paths)
+"""
+
+import argparse
+import hashlib
+import json
+import os
+import pickle
+import sys
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass, asdict
+from enum import Enum
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+
+# Version
+__version__ = "03.00.00"
+
+
+class PlatformType(Enum):
+    """Repository platform types enumeration."""
+
+    JOOMLA = "joomla"
+    DOLIBARR = "dolibarr"
+    GENERIC = "generic"
+
+
+@dataclass
+class DetectionResult:
+    """Platform detection result with confidence scoring.
+
+    Attributes:
+        platform_type: Detected platform type enum value.
+        confidence: Confidence score from 0-100.
+        indicators: List of detection indicators found.
+        metadata: Additional platform-specific metadata.
+    """
+
+    platform_type: PlatformType
+    confidence: int
+    indicators: List[str]
+    metadata: Dict[str, str]
+
+    def to_dict(self) -> Dict[str, any]:
+        """Convert detection result to dictionary for JSON serialization.
+
+        Returns:
+            Dictionary representation with platform_type as string value.
+        """
+        return {
+            "platform_type": self.platform_type.value,
+            "confidence": self.confidence,
+            "indicators": self.indicators,
+            "metadata": self.metadata
+        }
+
+
+class DetectionCache:
+    """Simple file-based cache for platform detection results.
+
+    Caches detection results based on repository path hash to avoid
+    re-scanning the same repository repeatedly.
+    """
+
+    def __init__(self, cache_dir: Optional[Path] = None) -> None:
+        """Initialize detection cache.
+
+        Args:
+            cache_dir: Directory for cache files. Defaults to ~/.cache/mokostudios.
+        """
+        if cache_dir is None:
+            cache_dir = Path.home() / ".cache" / "mokostudios" / "platform_detection"
+
+        self.cache_dir = cache_dir
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def _get_cache_key(self, repo_path: Path) -> str:
+        """Generate cache key from repository path.
+
+        Args:
+            repo_path: Absolute path to repository.
+
+        Returns:
+            SHA256 hash of the repository path as hex string.
+        """
+        return hashlib.sha256(str(repo_path).encode()).hexdigest()
+
+    def get(self, repo_path: Path) -> Optional[DetectionResult]:
+        """Retrieve cached detection result.
+
+        Args:
+            repo_path: Path to repository.
+
+        Returns:
+            Cached DetectionResult if available, None otherwise.
+        """
+        cache_file = self.cache_dir / f"{self._get_cache_key(repo_path)}.pkl"
+
+        if not cache_file.exists():
+            return None
+
+        try:
+            with open(cache_file, 'rb') as f:
+                return pickle.load(f)
+        except (pickle.PickleError, OSError, EOFError):
+            return None
+
+    def set(self, repo_path: Path, result: DetectionResult) -> None:
+        """Store detection result in cache.
+
+        Args:
+            repo_path: Path to repository.
+            result: Detection result to cache.
+        """
+        cache_file = self.cache_dir / f"{self._get_cache_key(repo_path)}.pkl"
+
+        try:
+            with open(cache_file, 'wb') as f:
+                pickle.dump(result, f)
+        except (pickle.PickleError, OSError):
+            pass
+
+    def clear(self) -> None:
+        """Clear all cached detection results."""
+        for cache_file in self.cache_dir.glob("*.pkl"):
+            try:
+                cache_file.unlink()
+            except OSError:
+                pass
+
+
+class PlatformDetector:
+    """Detects repository platform type with enhanced detection algorithms.
+
+    Provides platform detection for Joomla, Dolibarr, and generic repositories
+    with confidence scoring and detailed indicators.
+    """
+
+    def __init__(self, repo_path: Path, use_cache: bool = False) -> None:
+        """Initialize platform detector.
+
+        Args:
+            repo_path: Path to repository to analyze.
+            use_cache: Enable caching for performance optimization.
+        """
+        self.repo_path = Path(repo_path).resolve()
+        self.use_cache = use_cache
+        self.cache = DetectionCache() if use_cache else None
+
+        if not self.repo_path.exists():
+            raise ValueError(f"Repository path does not exist: {self.repo_path}")
+
+    def detect(self) -> DetectionResult:
+        """Detect repository platform type.
+
+        Executes platform-specific detection methods in order:
+        1. Joomla detection (manifest patterns, directory structure)
+        2. Dolibarr detection (module.php, core/ structure)
+        3. Generic fallback (confidence-based scoring)
+
+        Returns:
+            DetectionResult with platform type and confidence score.
+        """
+        if self.use_cache and self.cache:
+            cached_result = self.cache.get(self.repo_path)
+            if cached_result:
+                return cached_result
+
+        joomla_result = self._detect_joomla()
+        if joomla_result.confidence >= 50:
+            if self.use_cache and self.cache:
+                self.cache.set(self.repo_path, joomla_result)
+            return joomla_result
+
+        dolibarr_result = self._detect_dolibarr()
+        if dolibarr_result.confidence >= 50:
+            if self.use_cache and self.cache:
+                self.cache.set(self.repo_path, dolibarr_result)
+            return dolibarr_result
+
+        generic_result = self._detect_generic()
+        if self.use_cache and self.cache:
+            self.cache.set(self.repo_path, generic_result)
+        return generic_result
+
+    def _detect_joomla(self) -> DetectionResult:
+        """Detect Joomla component with enhanced manifest pattern matching.
+
+        Detection criteria:
+            - XML manifest files with <extension> or <install> root tags
+            - Extension type attribute (component, module, plugin, etc.)
+            - Joomla version tags in manifest
+            - Directory structure (site/, admin/, administrator/)
+            - Language directories (language/en-GB/)
+
+        Returns:
+            DetectionResult for Joomla platform with confidence score.
+        """
+        confidence = 0
+        indicators: List[str] = []
+        metadata: Dict[str, str] = {}
+
+        manifest_patterns = ["**/*.xml"]
+        skip_dirs = {".git", "vendor", "node_modules", ".github"}
+
+        for xml_file in self.repo_path.glob("**/*.xml"):
+            if any(skip_dir in xml_file.parts for skip_dir in skip_dirs):
+                continue
+
+            try:
+                tree = ET.parse(xml_file)
+                root = tree.getroot()
+
+                if root.tag in ["extension", "install"]:
+                    ext_type = root.get("type", "")
+
+                    if ext_type in ["component", "module", "plugin", "library", "template", "file"]:
+                        confidence += 50
+                        rel_path = xml_file.relative_to(self.repo_path)
+                        indicators.append(f"Joomla manifest: {rel_path} (type={ext_type})")
+                        metadata["manifest_file"] = str(rel_path)
+                        metadata["extension_type"] = ext_type
+
+                        version_elem = root.find("version")
+                        if version_elem is not None and version_elem.text:
+                            confidence += 10
+                            metadata["version"] = version_elem.text.strip()
+                            indicators.append(f"Joomla version tag: {version_elem.text.strip()}")
+
+                        name_elem = root.find("name")
+                        if name_elem is not None and name_elem.text:
+                            metadata["extension_name"] = name_elem.text.strip()
+
+                        break
+
+            except (ET.ParseError, OSError):
+                continue
+
+        joomla_dirs = ["site", "admin", "administrator"]
+        for dir_name in joomla_dirs:
+            if (self.repo_path / dir_name).is_dir():
+                confidence += 15
+                indicators.append(f"Joomla directory structure: {dir_name}/")
+
+        if (self.repo_path / "language" / "en-GB").exists():
+            confidence += 10
+            indicators.append("Joomla language directory: language/en-GB/")
+
+        media_dir = self.repo_path / "media"
+        if media_dir.is_dir() and list(media_dir.glob("**/*.css")):
+            confidence += 5
+            indicators.append("Joomla media directory with assets")
+
+        confidence = min(confidence, 100)
+
+        return DetectionResult(
+            platform_type=PlatformType.JOOMLA,
+            confidence=confidence,
+            indicators=indicators,
+            metadata=metadata
+        )
+
+    def _detect_dolibarr(self) -> DetectionResult:
+        """Detect Dolibarr module with enhanced structure analysis.
+
+        Detection criteria:
+            - Module descriptor files (mod*.class.php)
+            - DolibarrModules class extension patterns
+            - core/modules/ directory structure
+            - SQL migration files in sql/
+            - Class and lib directories
+
+        Returns:
+            DetectionResult for Dolibarr platform with confidence score.
+        """
+        confidence = 0
+        indicators: List[str] = []
+        metadata: Dict[str, str] = {}
+
+        descriptor_patterns = ["**/mod*.class.php", "**/core/modules/**/*.php"]
+        skip_dirs = {".git", "vendor", "node_modules"}
+
+        for pattern in descriptor_patterns:
+            for php_file in self.repo_path.glob(pattern):
+                if any(skip_dir in php_file.parts for skip_dir in skip_dirs):
+                    continue
+
+                try:
+                    content = php_file.read_text(encoding="utf-8", errors="ignore")
+
+                    dolibarr_patterns = [
+                        "extends DolibarrModules",
+                        "class mod",
+                        "$this->numero",
+                        "$this->rights_class",
+                        "DolibarrModules",
+                        "dol_include_once"
+                    ]
+
+                    pattern_matches = sum(1 for p in dolibarr_patterns if p in content)
+
+                    if pattern_matches >= 3:
+                        confidence += 60
+                        rel_path = php_file.relative_to(self.repo_path)
+                        indicators.append(f"Dolibarr module descriptor: {rel_path}")
+                        metadata["descriptor_file"] = str(rel_path)
+
+                        if "class mod" in content:
+                            import re
+                            match = re.search(r'class\s+(mod\w+)', content)
+                            if match:
+                                metadata["module_class"] = match.group(1)
+
+                        break
+
+                except (OSError, UnicodeDecodeError):
+                    continue
+
+        dolibarr_dirs = ["core/modules", "sql", "class", "lib", "langs"]
+        for dir_name in dolibarr_dirs:
+            dir_path = self.repo_path / dir_name
+            if dir_path.exists():
+                confidence += 8
+                indicators.append(f"Dolibarr directory structure: {dir_name}/")
+
+        sql_dir = self.repo_path / "sql"
+        if sql_dir.is_dir():
+            sql_files = list(sql_dir.glob("*.sql"))
+            if sql_files:
+                confidence += 10
+                indicators.append(f"Dolibarr SQL files: {len(sql_files)} migration scripts")
+                metadata["sql_files_count"] = str(len(sql_files))
+
+        confidence = min(confidence, 100)
+
+        return DetectionResult(
+            platform_type=PlatformType.DOLIBARR,
+            confidence=confidence,
+            indicators=indicators,
+            metadata=metadata
+        )
+
+    def _detect_generic(self) -> DetectionResult:
+        """Fallback detection for generic repositories with confidence scoring.
+
+        Provides baseline detection when no specific platform is identified.
+        Confidence score based on standard repository structure indicators.
+
+        Returns:
+            DetectionResult for generic platform with confidence score.
+        """
+        confidence = 50
+        indicators: List[str] = ["No platform-specific markers found"]
+        metadata: Dict[str, str] = {
+            "checked_platforms": "Joomla, Dolibarr",
+            "detection_reason": "Generic repository fallback"
+        }
+
+        standard_files = ["README.md", "LICENSE", ".gitignore", "composer.json", "package.json"]
+        found_files = []
+
+        for file_name in standard_files:
+            if (self.repo_path / file_name).exists():
+                found_files.append(file_name)
+                confidence += 5
+
+        if found_files:
+            indicators.append(f"Standard repository files: {', '.join(found_files)}")
+
+        standard_dirs = ["src", "tests", "docs", ".github"]
+        found_dirs = []
+
+        for dir_name in standard_dirs:
+            if (self.repo_path / dir_name).is_dir():
+                found_dirs.append(dir_name)
+                confidence += 3
+
+        if found_dirs:
+            indicators.append(f"Standard directory structure: {', '.join(found_dirs)}")
+
+        confidence = min(confidence, 100)
+
+        return DetectionResult(
+            platform_type=PlatformType.GENERIC,
+            confidence=confidence,
+            indicators=indicators,
+            metadata=metadata
+        )
+
+
+def main() -> int:
+    """Main entry point for platform detection CLI.
+
+    Returns:
+        Exit code: 0 for success, 1 for detection failure, 2 for config error.
+    """
+    parser = argparse.ArgumentParser(
+        description=f"Auto-detect repository platform v{__version__}",
+        epilog="For more information, see docs/scripts/validate/"
+    )
+    parser.add_argument(
+        "--repo-path",
+        type=str,
+        default=".",
+        help="Path to repository to analyze (default: current directory)"
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Output results in JSON format for automation"
+    )
+    parser.add_argument(
+        "--verbose",
+        action="store_true",
+        help="Enable verbose output with detailed indicators"
+    )
+    parser.add_argument(
+        "--cache",
+        action="store_true",
+        help="Enable caching for performance (stores results in ~/.cache/mokostudios)"
+    )
+    parser.add_argument(
+        "--clear-cache",
+        action="store_true",
+        help="Clear detection cache and exit"
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"%(prog)s {__version__}"
+    )
+
+    args = parser.parse_args()
+
+    if args.clear_cache:
+        cache = DetectionCache()
+        cache.clear()
+        if not args.json:
+            print("✓ Detection cache cleared")
+        return 0
+
+    try:
+        repo_path = Path(args.repo_path).resolve()
+
+        if not repo_path.exists():
+            if args.json:
+                print(json.dumps({"error": "Repository path does not exist", "path": str(repo_path)}))
+            else:
+                print(f"✗ Error: Repository path does not exist: {repo_path}", file=sys.stderr)
+            return 2
+
+        detector = PlatformDetector(repo_path, use_cache=args.cache)
+        result = detector.detect()
+
+        if args.json:
+            output = result.to_dict()
+            output["repo_path"] = str(repo_path)
+            output["version"] = __version__
+            print(json.dumps(output, indent=2))
+        else:
+            print("=" * 70)
+            print(f"Platform Auto-Detection v{__version__}")
+            print("=" * 70)
+            print()
+            print(f"📁 Repository: {repo_path}")
+            print(f"🔍 Platform: {result.platform_type.value.upper()}")
+            print(f"📊 Confidence: {result.confidence}%")
+            print()
+
+            if args.verbose and result.indicators:
+                print("Detection Indicators:")
+                for indicator in result.indicators:
+                    print(f"   • {indicator}")
+                print()
+
+            if args.verbose and result.metadata:
+                print("Metadata:")
+                for key, value in result.metadata.items():
+                    print(f"   {key}: {value}")
+                print()
+
+            if args.cache:
+                print("💾 Result cached for future runs")
+                print()
+
+            print("=" * 70)
+
+        return 0
+
+    except ValueError as e:
+        if args.json:
+            print(json.dumps({"error": str(e)}))
+        else:
+            print(f"✗ Error: {e}", file=sys.stderr)
+        return 2
+    except Exception as e:
+        if args.json:
+            print(json.dumps({"error": f"Unexpected error: {str(e)}"}))
+        else:
+            print(f"✗ Unexpected error: {e}", file=sys.stderr)
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/validate/validate_codeql_config.py
+++ b/scripts/validate/validate_codeql_config.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+"""
+Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
+
+This file is part of a Moko Consulting project.
+
+SPDX-License-Identifier: GPL-3.0-or-later
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+FILE INFORMATION
+DEFGROUP: MokoStandards.Scripts.Validate
+INGROUP: MokoStandards
+REPO: https://github.com/mokoconsulting-tech/MokoStandards
+PATH: /scripts/validate/validate_codeql_config.py
+VERSION: 01.00.00
+BRIEF: Validates CodeQL workflow language configuration matches repository contents
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from typing import Dict, List, Set, Tuple
+
+try:
+    import yaml
+except ImportError:
+    print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr)
+    sys.exit(1)
+
+
+# Language to file extension mapping
+LANGUAGE_EXTENSIONS = {
+    'python': {'.py'},
+    'javascript': {'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'},
+    'php': {'.php'},
+    'java': {'.java'},
+    'go': {'.go'},
+    'ruby': {'.rb'},
+    'cpp': {'.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'},
+    'csharp': {'.cs'},
+}
+
+
+def detect_languages_in_repo(repo_path: Path, exclude_dirs: Set[str] = None) -> Dict[str, int]:
+    """
+    Detect programming languages present in the repository by scanning file extensions.
+
+    Args:
+        repo_path: Path to the repository root
+        exclude_dirs: Set of directory names to exclude from scanning
+
+    Returns:
+        Dictionary mapping language names to file counts
+    """
+    if exclude_dirs is None:
+        exclude_dirs = {'.git', 'vendor', 'node_modules', '.venv', 'venv', '__pycache__'}
+
+    language_counts = {}
+
+    for language, extensions in LANGUAGE_EXTENSIONS.items():
+        count = 0
+        for ext in extensions:
+            for file_path in repo_path.rglob(f'*{ext}'):
+                # Skip excluded directories
+                if any(excluded in file_path.parts for excluded in exclude_dirs):
+                    continue
+                if file_path.is_file():
+                    count += 1
+
+        if count > 0:
+            language_counts[language] = count
+
+    return language_counts
+
+
+def parse_codeql_workflow(workflow_path: Path) -> Tuple[List[str], bool]:
+    """
+    Parse CodeQL workflow file and extract configured languages.
+
+    Args:
+        workflow_path: Path to the CodeQL workflow YAML file
+
+    Returns:
+        Tuple of (list of configured languages, whether parsing succeeded)
+    """
+    try:
+        with open(workflow_path, 'r') as f:
+            workflow = yaml.safe_load(f)
+
+        # Navigate to the matrix.language configuration
+        jobs = workflow.get('jobs', {})
+        for job_name, job_config in jobs.items():
+            strategy = job_config.get('strategy', {})
+            matrix = strategy.get('matrix', {})
+            languages = matrix.get('language', [])
+
+            if languages:
+                return languages, True
+
+        return [], False
+    except Exception as e:
+        print(f"Error parsing workflow: {e}", file=sys.stderr)
+        return [], False
+
+
+def validate_codeql_config(repo_path: Path, workflow_path: Path) -> Tuple[bool, List[str], List[str]]:
+    """
+    Validate that CodeQL workflow languages match repository contents.
+
+    Args:
+        repo_path: Path to the repository root
+        workflow_path: Path to the CodeQL workflow file
+
+    Returns:
+        Tuple of (is_valid, list of errors, list of warnings)
+    """
+    errors = []
+    warnings = []
+
+    # Check if workflow file exists
+    if not workflow_path.exists():
+        errors.append(f"CodeQL workflow not found at: {workflow_path}")
+        return False, errors, warnings
+
+    # Detect languages in repository
+    detected_languages = detect_languages_in_repo(repo_path)
+
+    if not detected_languages:
+        warnings.append("No supported programming languages detected in repository")
+        return True, errors, warnings
+
+    # Parse CodeQL workflow configuration
+    configured_languages, parse_success = parse_codeql_workflow(workflow_path)
+
+    if not parse_success:
+        errors.append("Could not find language configuration in CodeQL workflow")
+        return False, errors, warnings
+
+    if not configured_languages:
+        errors.append("No languages configured in CodeQL workflow matrix")
+        return False, errors, warnings
+
+    # Compare detected vs configured languages
+    detected_set = set(detected_languages.keys())
+    configured_set = set(configured_languages)
+
+    # Languages configured but not present in repo
+    extra_languages = configured_set - detected_set
+    if extra_languages:
+        for lang in extra_languages:
+            errors.append(
+                f"Language '{lang}' is configured in CodeQL but no {lang.upper()} files found in repository. "
+                f"This will cause CodeQL analysis to fail."
+            )
+
+    # Languages present but not configured
+    missing_languages = detected_set - configured_set
+    if missing_languages:
+        for lang in missing_languages:
+            file_count = detected_languages[lang]
+            warnings.append(
+                f"Language '{lang}' has {file_count} files in repository but is not configured in CodeQL workflow. "
+                f"Consider adding it for security scanning."
+            )
+
+    is_valid = len(errors) == 0
+    return is_valid, errors, warnings
+
+
+def main():
+    """Main entry point for the validation script."""
+    parser = argparse.ArgumentParser(
+        description='Validate CodeQL workflow language configuration against repository contents'
+    )
+    parser.add_argument(
+        '--repo-path',
+        type=Path,
+        default=Path('.'),
+        help='Path to repository root (default: current directory)'
+    )
+    parser.add_argument(
+        '--workflow-path',
+        type=Path,
+        help='Path to CodeQL workflow file (default: .github/workflows/codeql-analysis.yml)'
+    )
+    parser.add_argument(
+        '--strict',
+        action='store_true',
+        help='Treat warnings as errors'
+    )
+
+    args = parser.parse_args()
+
+    repo_path = args.repo_path.resolve()
+    workflow_path = args.workflow_path
+
+    if workflow_path is None:
+        workflow_path = repo_path / '.github' / 'workflows' / 'codeql-analysis.yml'
+    else:
+        workflow_path = workflow_path.resolve()
+
+    print(f"Validating CodeQL configuration...")
+    print(f"Repository: {repo_path}")
+    print(f"Workflow: {workflow_path}")
+    print()
+
+    # Detect languages first for informational purposes
+    detected_languages = detect_languages_in_repo(repo_path)
+    if detected_languages:
+        print("Detected languages in repository:")
+        for lang, count in sorted(detected_languages.items()):
+            print(f"  - {lang}: {count} files")
+        print()
+
+    # Validate configuration
+    is_valid, errors, warnings = validate_codeql_config(repo_path, workflow_path)
+
+    # Print results
+    if errors:
+        print("❌ ERRORS:")
+        for error in errors:
+            print(f"  - {error}")
+        print()
+
+    if warnings:
+        print("⚠️  WARNINGS:")
+        for warning in warnings:
+            print(f"  - {warning}")
+        print()
+
+    if is_valid and not warnings:
+        print("✅ CodeQL configuration is valid and matches repository contents")
+        return 0
+    elif is_valid:
+        print("✅ CodeQL configuration is valid (with warnings)")
+        if args.strict:
+            print("❌ Strict mode enabled: treating warnings as errors")
+            return 1
+        return 0
+    else:
+        print("❌ CodeQL configuration validation failed")
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/scripts/validate/validate_structure_v2.py
+++ b/scripts/validate/validate_structure_v2.py
@@ -0,0 +1,407 @@
+#!/usr/bin/env python3
+"""
+Repository Structure Validator (XML/JSON Support)
+
+Validates repository structure against XML or JSON schema definitions.
+Checks for required files, directories, validates naming conventions, and enforces
+requirement statuses (required, suggested, optional, not-allowed).
+
+Supports both XML and JSON schema formats for maximum flexibility.
+
+Usage:
+    python3 validate_structure_v2.py [--schema SCHEMA_FILE] [--format xml|json|auto] [--repo-path PATH]
+
+Examples:
+    # Auto-detect format from file extension
+    python3 validate_structure_v2.py --schema scripts/definitions/default-repository.xml
+    python3 validate_structure_v2.py --schema scripts/definitions/default-repository.json
+
+    # Explicit format specification
+    python3 validate_structure_v2.py --schema my-schema.txt --format json --repo-path /path/to/repo
+
+Exit codes:
+    0: Success (all validations passed)
+    1: Validation errors found (required items missing or not-allowed items present)
+    2: Validation warnings (suggested items missing)
+    3: Configuration error (invalid schema, missing files, etc.)
+"""
+
+import sys
+import os
+import argparse
+import xml.etree.ElementTree as ET
+import json
+from pathlib import Path
+from typing import List, Dict, Tuple, Optional, Any
+from dataclasses import dataclass
+from enum import Enum
+
+
+class Severity(Enum):
+    """Validation severity levels"""
+    ERROR = "error"
+    WARNING = "warning"
+    INFO = "info"
+
+
+class RequirementStatus(Enum):
+    """Requirement status levels"""
+    REQUIRED = "required"
+    SUGGESTED = "suggested"
+    OPTIONAL = "optional"
+    NOT_ALLOWED = "not-allowed"
+
+
+@dataclass
+class ValidationResult:
+    """Result of a validation check"""
+    severity: Severity
+    message: str
+    path: str
+    requirement_status: Optional[RequirementStatus] = None
+    rule_type: Optional[str] = None
+
+
+class RepositoryStructureValidator:
+    """Validates repository structure against XML or JSON definition"""
+
+    def __init__(self, schema_path: str, repo_path: str = ".", schema_format: str = "auto"):
+        """
+        Initialize validator
+
+        Args:
+            schema_path: Path to schema definition (XML or JSON)
+            repo_path: Path to repository to validate (default: current directory)
+            schema_format: Format of schema file ('xml', 'json', or 'auto' for auto-detection)
+        """
+        self.schema_path = schema_path
+        self.repo_path = Path(repo_path).resolve()
+        self.results: List[ValidationResult] = []
+        self.schema_format = schema_format
+        self.structure_data = None
+
+        # Determine format
+        if self.schema_format == "auto":
+            self.schema_format = self._detect_format()
+
+        # Load schema
+        try:
+            if self.schema_format == "xml":
+                self._load_xml_schema()
+            elif self.schema_format == "json":
+                self._load_json_schema()
+            else:
+                raise ValueError(f"Unsupported schema format: {self.schema_format}")
+        except Exception as e:
+            print(f"Error loading schema: {e}", file=sys.stderr)
+            sys.exit(3)
+
+    def _detect_format(self) -> str:
+        """Auto-detect schema format from file extension"""
+        ext = Path(self.schema_path).suffix.lower()
+        if ext == ".json":
+            return "json"
+        elif ext in [".xml", ""]:
+            return "xml"
+        else:
+            # Try to detect from content
+            try:
+                with open(self.schema_path, 'r') as f:
+                    content = f.read().strip()
+                    if content.startswith('{') or content.startswith('['):
+                        return "json"
+                    elif content.startswith('<?xml') or content.startswith('<'):
+                        return "xml"
+            except Exception:
+                pass
+
+            # Unable to detect format
+            raise ValueError(f"Unable to detect schema format for {self.schema_path}")
+
+    def _load_xml_schema(self):
+        """Load XML schema"""
+        self.tree = ET.parse(self.schema_path)
+        self.root = self.tree.getroot()
+        self.namespace = {'rs': 'http://mokoconsulting.com/schemas/repository-structure'}
+        self.structure_data = self._parse_xml_to_dict()
+
+    def _load_json_schema(self):
+        """Load JSON schema"""
+        with open(self.schema_path, 'r') as f:
+            self.structure_data = json.load(f)
+
+    def _parse_xml_to_dict(self) -> Dict[str, Any]:
+        """Convert XML structure to dictionary format for unified processing"""
+        structure = {}
+
+        # Parse metadata
+        metadata_elem = self.root.find('rs:metadata', self.namespace)
+        if metadata_elem is not None:
+            structure['metadata'] = {
+                'name': self._get_element_text(metadata_elem, 'name'),
+                'description': self._get_element_text(metadata_elem, 'description'),
+                'repositoryType': self._get_element_text(metadata_elem, 'repository-type'),
+                'platform': self._get_element_text(metadata_elem, 'platform'),
+            }
+
+        # Parse structure
+        structure_elem = self.root.find('rs:structure', self.namespace)
+        if structure_elem is not None:
+            structure['structure'] = {}
+
+            # Parse root files
+            root_files_elem = structure_elem.find('rs:root-files', self.namespace)
+            if root_files_elem is not None:
+                structure['structure']['rootFiles'] = []
+                for file_elem in root_files_elem.findall('rs:file', self.namespace):
+                    structure['structure']['rootFiles'].append(self._parse_xml_file(file_elem))
+
+            # Parse directories
+            directories_elem = structure_elem.find('rs:directories', self.namespace)
+            if directories_elem is not None:
+                structure['structure']['directories'] = []
+                for dir_elem in directories_elem.findall('rs:directory', self.namespace):
+                    structure['structure']['directories'].append(self._parse_xml_directory(dir_elem))
+
+        return structure
+
+    def _parse_xml_file(self, file_elem) -> Dict[str, Any]:
+        """Parse XML file element to dictionary"""
+        file_data = {
+            'name': self._get_element_text(file_elem, 'name'),
+            'description': self._get_element_text(file_elem, 'description'),
+            'requirementStatus': self._get_element_text(file_elem, 'requirement-status', 'required'),
+            'audience': self._get_element_text(file_elem, 'audience'),
+            'template': self._get_element_text(file_elem, 'template'),
+        }
+
+        # Handle extension attribute
+        if 'extension' in file_elem.attrib:
+            file_data['extension'] = file_elem.attrib['extension']
+
+        return {k: v for k, v in file_data.items() if v is not None}
+
+    def _parse_xml_directory(self, dir_elem) -> Dict[str, Any]:
+        """Parse XML directory element to dictionary"""
+        dir_data = {
+            'name': self._get_element_text(dir_elem, 'name'),
+            'path': dir_elem.attrib.get('path'),
+            'description': self._get_element_text(dir_elem, 'description'),
+            'requirementStatus': self._get_element_text(dir_elem, 'requirement-status', 'required'),
+            'purpose': self._get_element_text(dir_elem, 'purpose'),
+        }
+
+        # Parse files within directory
+        files_elem = dir_elem.find('rs:files', self.namespace)
+        if files_elem is not None:
+            dir_data['files'] = []
+            for file_elem in files_elem.findall('rs:file', self.namespace):
+                dir_data['files'].append(self._parse_xml_file(file_elem))
+
+        # Parse subdirectories
+        subdirs_elem = dir_elem.find('rs:subdirectories', self.namespace)
+        if subdirs_elem is not None:
+            dir_data['subdirectories'] = []
+            for subdir_elem in subdirs_elem.findall('rs:directory', self.namespace):
+                dir_data['subdirectories'].append(self._parse_xml_directory(subdir_elem))
+
+        return {k: v for k, v in dir_data.items() if v is not None}
+
+    def _get_element_text(self, parent, tag_name, default=None):
+        """Get text content of XML element"""
+        if self.schema_format == "xml":
+            elem = parent.find(f'rs:{tag_name}', self.namespace)
+            return elem.text if elem is not None else default
+        return default
+
+    def validate(self) -> List[ValidationResult]:
+        """
+        Run all validation checks
+
+        Returns:
+            List of validation results
+        """
+        self.results = []
+
+        print(f"Validating repository: {self.repo_path}")
+        print(f"Against schema: {self.schema_path} (format: {self.schema_format})")
+        print("-" * 80)
+
+        # Validate root files
+        if 'structure' in self.structure_data and 'rootFiles' in self.structure_data['structure']:
+            for file_def in self.structure_data['structure']['rootFiles']:
+                self._validate_file(file_def, self.repo_path)
+
+        # Validate directories
+        if 'structure' in self.structure_data and 'directories' in self.structure_data['structure']:
+            for dir_def in self.structure_data['structure']['directories']:
+                self._validate_directory(dir_def, self.repo_path)
+
+        return self.results
+
+    def _validate_file(self, file_def: Dict[str, Any], parent_path: Path):
+        """Validate a file requirement"""
+        file_name = file_def.get('name')
+        requirement_status = RequirementStatus(file_def.get('requirementStatus', 'required'))
+        file_path = parent_path / file_name
+        exists = file_path.exists() and file_path.is_file()
+
+        if requirement_status == RequirementStatus.REQUIRED and not exists:
+            self.results.append(ValidationResult(
+                severity=Severity.ERROR,
+                message=f"Required file missing: {file_name}",
+                path=str(file_path.relative_to(self.repo_path)),
+                requirement_status=requirement_status
+            ))
+        elif requirement_status == RequirementStatus.SUGGESTED and not exists:
+            self.results.append(ValidationResult(
+                severity=Severity.WARNING,
+                message=f"Suggested file missing: {file_name}",
+                path=str(file_path.relative_to(self.repo_path)),
+                requirement_status=requirement_status
+            ))
+        elif requirement_status == RequirementStatus.NOT_ALLOWED and exists:
+            self.results.append(ValidationResult(
+                severity=Severity.ERROR,
+                message=f"Not-allowed file present: {file_name} (should not be committed)",
+                path=str(file_path.relative_to(self.repo_path)),
+                requirement_status=requirement_status
+            ))
+        elif exists:
+            self.results.append(ValidationResult(
+                severity=Severity.INFO,
+                message=f"File present: {file_name}",
+                path=str(file_path.relative_to(self.repo_path)),
+                requirement_status=requirement_status
+            ))
+
+    def _validate_directory(self, dir_def: Dict[str, Any], parent_path: Path):
+        """Validate a directory requirement"""
+        dir_name = dir_def.get('name')
+        dir_path_str = dir_def.get('path', dir_name)
+        requirement_status = RequirementStatus(dir_def.get('requirementStatus', 'required'))
+        dir_path = self.repo_path / dir_path_str
+        exists = dir_path.exists() and dir_path.is_dir()
+
+        if requirement_status == RequirementStatus.REQUIRED and not exists:
+            self.results.append(ValidationResult(
+                severity=Severity.ERROR,
+                message=f"Required directory missing: {dir_name}",
+                path=dir_path_str,
+                requirement_status=requirement_status
+            ))
+            return  # Skip validating contents if directory doesn't exist
+        elif requirement_status == RequirementStatus.SUGGESTED and not exists:
+            self.results.append(ValidationResult(
+                severity=Severity.WARNING,
+                message=f"Suggested directory missing: {dir_name}",
+                path=dir_path_str,
+                requirement_status=requirement_status
+            ))
+            return
+        elif requirement_status == RequirementStatus.NOT_ALLOWED and exists:
+            self.results.append(ValidationResult(
+                severity=Severity.ERROR,
+                message=f"Not-allowed directory present: {dir_name} (should not be committed)",
+                path=dir_path_str,
+                requirement_status=requirement_status
+            ))
+            return
+        elif exists:
+            self.results.append(ValidationResult(
+                severity=Severity.INFO,
+                message=f"Directory present: {dir_name}",
+                path=dir_path_str,
+                requirement_status=requirement_status
+            ))
+
+        # Validate files within directory
+        if exists and 'files' in dir_def:
+            for file_def in dir_def['files']:
+                self._validate_file(file_def, dir_path)
+
+        # Validate subdirectories
+        if exists and 'subdirectories' in dir_def:
+            for subdir_def in dir_def['subdirectories']:
+                self._validate_directory(subdir_def, dir_path)
+
+    def print_results(self):
+        """Print validation results"""
+        errors = [r for r in self.results if r.severity == Severity.ERROR]
+        warnings = [r for r in self.results if r.severity == Severity.WARNING]
+        infos = [r for r in self.results if r.severity == Severity.INFO]
+
+        print("\n" + "=" * 80)
+        print("VALIDATION RESULTS")
+        print("=" * 80)
+
+        if errors:
+            print(f"\n❌ ERRORS ({len(errors)}):")
+            for result in errors:
+                print(f"   {result.path}: {result.message}")
+
+        if warnings:
+            print(f"\n⚠️  WARNINGS ({len(warnings)}):")
+            for result in warnings:
+                print(f"   {result.path}: {result.message}")
+
+        if infos:
+            print(f"\n✓ INFO ({len(infos)} items validated successfully)")
+
+        print("\n" + "=" * 80)
+        print(f"Summary: {len(errors)} errors, {len(warnings)} warnings, {len(infos)} info")
+        print("=" * 80)
+
+        return len(errors), len(warnings)
+
+
+def main():
+    """Main entry point"""
+    parser = argparse.ArgumentParser(
+        description='Validate repository structure against XML or JSON schema',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__
+    )
+    parser.add_argument(
+        '--schema',
+        default='scripts/definitions/default-repository.xml',
+        help='Path to schema file (XML or JSON). Default: scripts/definitions/default-repository.xml'
+    )
+    parser.add_argument(
+        '--format',
+        choices=['xml', 'json', 'auto'],
+        default='auto',
+        help='Schema format (xml, json, or auto-detect). Default: auto'
+    )
+    parser.add_argument(
+        '--repo-path',
+        default='.',
+        help='Path to repository to validate. Default: current directory'
+    )
+
+    args = parser.parse_args()
+
+    # Create validator
+    validator = RepositoryStructureValidator(
+        schema_path=args.schema,
+        repo_path=args.repo_path,
+        schema_format=args.format
+    )
+
+    # Run validation
+    validator.validate()
+
+    # Print results
+    errors, warnings = validator.print_results()
+
+    # Exit with appropriate code
+    if errors > 0:
+        sys.exit(1)  # Errors found
+    elif warnings > 0:
+        sys.exit(2)  # Only warnings
+    else:
+        sys.exit(0)  # Success
+
+
+if __name__ == '__main__':
+    main()