chore: sync workflows, scripts, and configurations from MokoStandards

This commit is contained in:
Moko Standards Bot
2026-01-30 02:15:07 +00:00
parent 2436054ae6
commit 0f2c0c1166
16 changed files with 5064 additions and 28 deletions

View File

@@ -0,0 +1,533 @@
#!/usr/bin/env python3
"""Auto-Detect Repository Platform v03.00.00 - Critical Validator Infrastructure.
This script automatically detects repository platform types with confidence scoring
and provides JSON/CLI output for automation workflows.
Platform detection capabilities:
- Joomla/WaaS components (manifest patterns, version detection)
- Dolibarr/CRM modules (module.php, core/ structure)
- Generic repositories (fallback with confidence scoring)
Usage:
python3 auto_detect_platform.py [--repo-path PATH] [--json] [--verbose] [--cache]
Examples:
# Auto-detect current repository with JSON output
python3 auto_detect_platform.py --json
# Detect specific repository with caching
python3 auto_detect_platform.py --repo-path /path/to/repo --cache --verbose
# JSON output for CI/CD automation
python3 auto_detect_platform.py --json | jq '.platform_type'
Exit codes:
0: Success (platform detected successfully)
1: Detection failed (no platform could be determined)
2: Configuration error (invalid arguments or paths)
"""
import argparse
import hashlib
import json
import os
import pickle
import sys
import xml.etree.ElementTree as ET
from dataclasses import dataclass, asdict
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Tuple
# Version
__version__ = "03.00.00"
class PlatformType(Enum):
"""Repository platform types enumeration."""
JOOMLA = "joomla"
DOLIBARR = "dolibarr"
GENERIC = "generic"
@dataclass
class DetectionResult:
"""Platform detection result with confidence scoring.
Attributes:
platform_type: Detected platform type enum value.
confidence: Confidence score from 0-100.
indicators: List of detection indicators found.
metadata: Additional platform-specific metadata.
"""
platform_type: PlatformType
confidence: int
indicators: List[str]
metadata: Dict[str, str]
def to_dict(self) -> Dict[str, any]:
"""Convert detection result to dictionary for JSON serialization.
Returns:
Dictionary representation with platform_type as string value.
"""
return {
"platform_type": self.platform_type.value,
"confidence": self.confidence,
"indicators": self.indicators,
"metadata": self.metadata
}
class DetectionCache:
"""Simple file-based cache for platform detection results.
Caches detection results based on repository path hash to avoid
re-scanning the same repository repeatedly.
"""
def __init__(self, cache_dir: Optional[Path] = None) -> None:
"""Initialize detection cache.
Args:
cache_dir: Directory for cache files. Defaults to ~/.cache/mokostudios.
"""
if cache_dir is None:
cache_dir = Path.home() / ".cache" / "mokostudios" / "platform_detection"
self.cache_dir = cache_dir
self.cache_dir.mkdir(parents=True, exist_ok=True)
def _get_cache_key(self, repo_path: Path) -> str:
"""Generate cache key from repository path.
Args:
repo_path: Absolute path to repository.
Returns:
SHA256 hash of the repository path as hex string.
"""
return hashlib.sha256(str(repo_path).encode()).hexdigest()
def get(self, repo_path: Path) -> Optional[DetectionResult]:
"""Retrieve cached detection result.
Args:
repo_path: Path to repository.
Returns:
Cached DetectionResult if available, None otherwise.
"""
cache_file = self.cache_dir / f"{self._get_cache_key(repo_path)}.pkl"
if not cache_file.exists():
return None
try:
with open(cache_file, 'rb') as f:
return pickle.load(f)
except (pickle.PickleError, OSError, EOFError):
return None
def set(self, repo_path: Path, result: DetectionResult) -> None:
"""Store detection result in cache.
Args:
repo_path: Path to repository.
result: Detection result to cache.
"""
cache_file = self.cache_dir / f"{self._get_cache_key(repo_path)}.pkl"
try:
with open(cache_file, 'wb') as f:
pickle.dump(result, f)
except (pickle.PickleError, OSError):
pass
def clear(self) -> None:
"""Clear all cached detection results."""
for cache_file in self.cache_dir.glob("*.pkl"):
try:
cache_file.unlink()
except OSError:
pass
class PlatformDetector:
"""Detects repository platform type with enhanced detection algorithms.
Provides platform detection for Joomla, Dolibarr, and generic repositories
with confidence scoring and detailed indicators.
"""
def __init__(self, repo_path: Path, use_cache: bool = False) -> None:
"""Initialize platform detector.
Args:
repo_path: Path to repository to analyze.
use_cache: Enable caching for performance optimization.
"""
self.repo_path = Path(repo_path).resolve()
self.use_cache = use_cache
self.cache = DetectionCache() if use_cache else None
if not self.repo_path.exists():
raise ValueError(f"Repository path does not exist: {self.repo_path}")
def detect(self) -> DetectionResult:
"""Detect repository platform type.
Executes platform-specific detection methods in order:
1. Joomla detection (manifest patterns, directory structure)
2. Dolibarr detection (module.php, core/ structure)
3. Generic fallback (confidence-based scoring)
Returns:
DetectionResult with platform type and confidence score.
"""
if self.use_cache and self.cache:
cached_result = self.cache.get(self.repo_path)
if cached_result:
return cached_result
joomla_result = self._detect_joomla()
if joomla_result.confidence >= 50:
if self.use_cache and self.cache:
self.cache.set(self.repo_path, joomla_result)
return joomla_result
dolibarr_result = self._detect_dolibarr()
if dolibarr_result.confidence >= 50:
if self.use_cache and self.cache:
self.cache.set(self.repo_path, dolibarr_result)
return dolibarr_result
generic_result = self._detect_generic()
if self.use_cache and self.cache:
self.cache.set(self.repo_path, generic_result)
return generic_result
def _detect_joomla(self) -> DetectionResult:
"""Detect Joomla component with enhanced manifest pattern matching.
Detection criteria:
- XML manifest files with <extension> or <install> root tags
- Extension type attribute (component, module, plugin, etc.)
- Joomla version tags in manifest
- Directory structure (site/, admin/, administrator/)
- Language directories (language/en-GB/)
Returns:
DetectionResult for Joomla platform with confidence score.
"""
confidence = 0
indicators: List[str] = []
metadata: Dict[str, str] = {}
manifest_patterns = ["**/*.xml"]
skip_dirs = {".git", "vendor", "node_modules", ".github"}
for xml_file in self.repo_path.glob("**/*.xml"):
if any(skip_dir in xml_file.parts for skip_dir in skip_dirs):
continue
try:
tree = ET.parse(xml_file)
root = tree.getroot()
if root.tag in ["extension", "install"]:
ext_type = root.get("type", "")
if ext_type in ["component", "module", "plugin", "library", "template", "file"]:
confidence += 50
rel_path = xml_file.relative_to(self.repo_path)
indicators.append(f"Joomla manifest: {rel_path} (type={ext_type})")
metadata["manifest_file"] = str(rel_path)
metadata["extension_type"] = ext_type
version_elem = root.find("version")
if version_elem is not None and version_elem.text:
confidence += 10
metadata["version"] = version_elem.text.strip()
indicators.append(f"Joomla version tag: {version_elem.text.strip()}")
name_elem = root.find("name")
if name_elem is not None and name_elem.text:
metadata["extension_name"] = name_elem.text.strip()
break
except (ET.ParseError, OSError):
continue
joomla_dirs = ["site", "admin", "administrator"]
for dir_name in joomla_dirs:
if (self.repo_path / dir_name).is_dir():
confidence += 15
indicators.append(f"Joomla directory structure: {dir_name}/")
if (self.repo_path / "language" / "en-GB").exists():
confidence += 10
indicators.append("Joomla language directory: language/en-GB/")
media_dir = self.repo_path / "media"
if media_dir.is_dir() and list(media_dir.glob("**/*.css")):
confidence += 5
indicators.append("Joomla media directory with assets")
confidence = min(confidence, 100)
return DetectionResult(
platform_type=PlatformType.JOOMLA,
confidence=confidence,
indicators=indicators,
metadata=metadata
)
def _detect_dolibarr(self) -> DetectionResult:
"""Detect Dolibarr module with enhanced structure analysis.
Detection criteria:
- Module descriptor files (mod*.class.php)
- DolibarrModules class extension patterns
- core/modules/ directory structure
- SQL migration files in sql/
- Class and lib directories
Returns:
DetectionResult for Dolibarr platform with confidence score.
"""
confidence = 0
indicators: List[str] = []
metadata: Dict[str, str] = {}
descriptor_patterns = ["**/mod*.class.php", "**/core/modules/**/*.php"]
skip_dirs = {".git", "vendor", "node_modules"}
for pattern in descriptor_patterns:
for php_file in self.repo_path.glob(pattern):
if any(skip_dir in php_file.parts for skip_dir in skip_dirs):
continue
try:
content = php_file.read_text(encoding="utf-8", errors="ignore")
dolibarr_patterns = [
"extends DolibarrModules",
"class mod",
"$this->numero",
"$this->rights_class",
"DolibarrModules",
"dol_include_once"
]
pattern_matches = sum(1 for p in dolibarr_patterns if p in content)
if pattern_matches >= 3:
confidence += 60
rel_path = php_file.relative_to(self.repo_path)
indicators.append(f"Dolibarr module descriptor: {rel_path}")
metadata["descriptor_file"] = str(rel_path)
if "class mod" in content:
import re
match = re.search(r'class\s+(mod\w+)', content)
if match:
metadata["module_class"] = match.group(1)
break
except (OSError, UnicodeDecodeError):
continue
dolibarr_dirs = ["core/modules", "sql", "class", "lib", "langs"]
for dir_name in dolibarr_dirs:
dir_path = self.repo_path / dir_name
if dir_path.exists():
confidence += 8
indicators.append(f"Dolibarr directory structure: {dir_name}/")
sql_dir = self.repo_path / "sql"
if sql_dir.is_dir():
sql_files = list(sql_dir.glob("*.sql"))
if sql_files:
confidence += 10
indicators.append(f"Dolibarr SQL files: {len(sql_files)} migration scripts")
metadata["sql_files_count"] = str(len(sql_files))
confidence = min(confidence, 100)
return DetectionResult(
platform_type=PlatformType.DOLIBARR,
confidence=confidence,
indicators=indicators,
metadata=metadata
)
def _detect_generic(self) -> DetectionResult:
"""Fallback detection for generic repositories with confidence scoring.
Provides baseline detection when no specific platform is identified.
Confidence score based on standard repository structure indicators.
Returns:
DetectionResult for generic platform with confidence score.
"""
confidence = 50
indicators: List[str] = ["No platform-specific markers found"]
metadata: Dict[str, str] = {
"checked_platforms": "Joomla, Dolibarr",
"detection_reason": "Generic repository fallback"
}
standard_files = ["README.md", "LICENSE", ".gitignore", "composer.json", "package.json"]
found_files = []
for file_name in standard_files:
if (self.repo_path / file_name).exists():
found_files.append(file_name)
confidence += 5
if found_files:
indicators.append(f"Standard repository files: {', '.join(found_files)}")
standard_dirs = ["src", "tests", "docs", ".github"]
found_dirs = []
for dir_name in standard_dirs:
if (self.repo_path / dir_name).is_dir():
found_dirs.append(dir_name)
confidence += 3
if found_dirs:
indicators.append(f"Standard directory structure: {', '.join(found_dirs)}")
confidence = min(confidence, 100)
return DetectionResult(
platform_type=PlatformType.GENERIC,
confidence=confidence,
indicators=indicators,
metadata=metadata
)
def main() -> int:
"""Main entry point for platform detection CLI.
Returns:
Exit code: 0 for success, 1 for detection failure, 2 for config error.
"""
parser = argparse.ArgumentParser(
description=f"Auto-detect repository platform v{__version__}",
epilog="For more information, see docs/scripts/validate/"
)
parser.add_argument(
"--repo-path",
type=str,
default=".",
help="Path to repository to analyze (default: current directory)"
)
parser.add_argument(
"--json",
action="store_true",
help="Output results in JSON format for automation"
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable verbose output with detailed indicators"
)
parser.add_argument(
"--cache",
action="store_true",
help="Enable caching for performance (stores results in ~/.cache/mokostudios)"
)
parser.add_argument(
"--clear-cache",
action="store_true",
help="Clear detection cache and exit"
)
parser.add_argument(
"--version",
action="version",
version=f"%(prog)s {__version__}"
)
args = parser.parse_args()
if args.clear_cache:
cache = DetectionCache()
cache.clear()
if not args.json:
print("✓ Detection cache cleared")
return 0
try:
repo_path = Path(args.repo_path).resolve()
if not repo_path.exists():
if args.json:
print(json.dumps({"error": "Repository path does not exist", "path": str(repo_path)}))
else:
print(f"✗ Error: Repository path does not exist: {repo_path}", file=sys.stderr)
return 2
detector = PlatformDetector(repo_path, use_cache=args.cache)
result = detector.detect()
if args.json:
output = result.to_dict()
output["repo_path"] = str(repo_path)
output["version"] = __version__
print(json.dumps(output, indent=2))
else:
print("=" * 70)
print(f"Platform Auto-Detection v{__version__}")
print("=" * 70)
print()
print(f"📁 Repository: {repo_path}")
print(f"🔍 Platform: {result.platform_type.value.upper()}")
print(f"📊 Confidence: {result.confidence}%")
print()
if args.verbose and result.indicators:
print("Detection Indicators:")
for indicator in result.indicators:
print(f"{indicator}")
print()
if args.verbose and result.metadata:
print("Metadata:")
for key, value in result.metadata.items():
print(f" {key}: {value}")
print()
if args.cache:
print("💾 Result cached for future runs")
print()
print("=" * 70)
return 0
except ValueError as e:
if args.json:
print(json.dumps({"error": str(e)}))
else:
print(f"✗ Error: {e}", file=sys.stderr)
return 2
except Exception as e:
if args.json:
print(json.dumps({"error": f"Unexpected error: {str(e)}"}))
else:
print(f"✗ Unexpected error: {e}", file=sys.stderr)
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,258 @@
#!/usr/bin/env python3
"""
Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
This file is part of a Moko Consulting project.
SPDX-License-Identifier: GPL-3.0-or-later
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
FILE INFORMATION
DEFGROUP: MokoStandards.Scripts.Validate
INGROUP: MokoStandards
REPO: https://github.com/mokoconsulting-tech/MokoStandards
PATH: /scripts/validate/validate_codeql_config.py
VERSION: 01.00.00
BRIEF: Validates CodeQL workflow language configuration matches repository contents
"""
import argparse
import sys
from pathlib import Path
from typing import Dict, List, Set, Tuple
try:
import yaml
except ImportError:
print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr)
sys.exit(1)
# Language to file extension mapping
LANGUAGE_EXTENSIONS = {
'python': {'.py'},
'javascript': {'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'},
'php': {'.php'},
'java': {'.java'},
'go': {'.go'},
'ruby': {'.rb'},
'cpp': {'.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'},
'csharp': {'.cs'},
}
def detect_languages_in_repo(repo_path: Path, exclude_dirs: Set[str] = None) -> Dict[str, int]:
"""
Detect programming languages present in the repository by scanning file extensions.
Args:
repo_path: Path to the repository root
exclude_dirs: Set of directory names to exclude from scanning
Returns:
Dictionary mapping language names to file counts
"""
if exclude_dirs is None:
exclude_dirs = {'.git', 'vendor', 'node_modules', '.venv', 'venv', '__pycache__'}
language_counts = {}
for language, extensions in LANGUAGE_EXTENSIONS.items():
count = 0
for ext in extensions:
for file_path in repo_path.rglob(f'*{ext}'):
# Skip excluded directories
if any(excluded in file_path.parts for excluded in exclude_dirs):
continue
if file_path.is_file():
count += 1
if count > 0:
language_counts[language] = count
return language_counts
def parse_codeql_workflow(workflow_path: Path) -> Tuple[List[str], bool]:
"""
Parse CodeQL workflow file and extract configured languages.
Args:
workflow_path: Path to the CodeQL workflow YAML file
Returns:
Tuple of (list of configured languages, whether parsing succeeded)
"""
try:
with open(workflow_path, 'r') as f:
workflow = yaml.safe_load(f)
# Navigate to the matrix.language configuration
jobs = workflow.get('jobs', {})
for job_name, job_config in jobs.items():
strategy = job_config.get('strategy', {})
matrix = strategy.get('matrix', {})
languages = matrix.get('language', [])
if languages:
return languages, True
return [], False
except Exception as e:
print(f"Error parsing workflow: {e}", file=sys.stderr)
return [], False
def validate_codeql_config(repo_path: Path, workflow_path: Path) -> Tuple[bool, List[str], List[str]]:
"""
Validate that CodeQL workflow languages match repository contents.
Args:
repo_path: Path to the repository root
workflow_path: Path to the CodeQL workflow file
Returns:
Tuple of (is_valid, list of errors, list of warnings)
"""
errors = []
warnings = []
# Check if workflow file exists
if not workflow_path.exists():
errors.append(f"CodeQL workflow not found at: {workflow_path}")
return False, errors, warnings
# Detect languages in repository
detected_languages = detect_languages_in_repo(repo_path)
if not detected_languages:
warnings.append("No supported programming languages detected in repository")
return True, errors, warnings
# Parse CodeQL workflow configuration
configured_languages, parse_success = parse_codeql_workflow(workflow_path)
if not parse_success:
errors.append("Could not find language configuration in CodeQL workflow")
return False, errors, warnings
if not configured_languages:
errors.append("No languages configured in CodeQL workflow matrix")
return False, errors, warnings
# Compare detected vs configured languages
detected_set = set(detected_languages.keys())
configured_set = set(configured_languages)
# Languages configured but not present in repo
extra_languages = configured_set - detected_set
if extra_languages:
for lang in extra_languages:
errors.append(
f"Language '{lang}' is configured in CodeQL but no {lang.upper()} files found in repository. "
f"This will cause CodeQL analysis to fail."
)
# Languages present but not configured
missing_languages = detected_set - configured_set
if missing_languages:
for lang in missing_languages:
file_count = detected_languages[lang]
warnings.append(
f"Language '{lang}' has {file_count} files in repository but is not configured in CodeQL workflow. "
f"Consider adding it for security scanning."
)
is_valid = len(errors) == 0
return is_valid, errors, warnings
def main():
"""Main entry point for the validation script."""
parser = argparse.ArgumentParser(
description='Validate CodeQL workflow language configuration against repository contents'
)
parser.add_argument(
'--repo-path',
type=Path,
default=Path('.'),
help='Path to repository root (default: current directory)'
)
parser.add_argument(
'--workflow-path',
type=Path,
help='Path to CodeQL workflow file (default: .github/workflows/codeql-analysis.yml)'
)
parser.add_argument(
'--strict',
action='store_true',
help='Treat warnings as errors'
)
args = parser.parse_args()
repo_path = args.repo_path.resolve()
workflow_path = args.workflow_path
if workflow_path is None:
workflow_path = repo_path / '.github' / 'workflows' / 'codeql-analysis.yml'
else:
workflow_path = workflow_path.resolve()
print(f"Validating CodeQL configuration...")
print(f"Repository: {repo_path}")
print(f"Workflow: {workflow_path}")
print()
# Detect languages first for informational purposes
detected_languages = detect_languages_in_repo(repo_path)
if detected_languages:
print("Detected languages in repository:")
for lang, count in sorted(detected_languages.items()):
print(f" - {lang}: {count} files")
print()
# Validate configuration
is_valid, errors, warnings = validate_codeql_config(repo_path, workflow_path)
# Print results
if errors:
print("❌ ERRORS:")
for error in errors:
print(f" - {error}")
print()
if warnings:
print("⚠️ WARNINGS:")
for warning in warnings:
print(f" - {warning}")
print()
if is_valid and not warnings:
print("✅ CodeQL configuration is valid and matches repository contents")
return 0
elif is_valid:
print("✅ CodeQL configuration is valid (with warnings)")
if args.strict:
print("❌ Strict mode enabled: treating warnings as errors")
return 1
return 0
else:
print("❌ CodeQL configuration validation failed")
return 1
if __name__ == '__main__':
sys.exit(main())

View File

@@ -0,0 +1,407 @@
#!/usr/bin/env python3
"""
Repository Structure Validator (XML/JSON Support)
Validates repository structure against XML or JSON schema definitions.
Checks for required files, directories, validates naming conventions, and enforces
requirement statuses (required, suggested, optional, not-allowed).
Supports both XML and JSON schema formats for maximum flexibility.
Usage:
python3 validate_structure_v2.py [--schema SCHEMA_FILE] [--format xml|json|auto] [--repo-path PATH]
Examples:
# Auto-detect format from file extension
python3 validate_structure_v2.py --schema scripts/definitions/default-repository.xml
python3 validate_structure_v2.py --schema scripts/definitions/default-repository.json
# Explicit format specification
python3 validate_structure_v2.py --schema my-schema.txt --format json --repo-path /path/to/repo
Exit codes:
0: Success (all validations passed)
1: Validation errors found (required items missing or not-allowed items present)
2: Validation warnings (suggested items missing)
3: Configuration error (invalid schema, missing files, etc.)
"""
import sys
import os
import argparse
import xml.etree.ElementTree as ET
import json
from pathlib import Path
from typing import List, Dict, Tuple, Optional, Any
from dataclasses import dataclass
from enum import Enum
class Severity(Enum):
"""Validation severity levels"""
ERROR = "error"
WARNING = "warning"
INFO = "info"
class RequirementStatus(Enum):
"""Requirement status levels"""
REQUIRED = "required"
SUGGESTED = "suggested"
OPTIONAL = "optional"
NOT_ALLOWED = "not-allowed"
@dataclass
class ValidationResult:
"""Result of a validation check"""
severity: Severity
message: str
path: str
requirement_status: Optional[RequirementStatus] = None
rule_type: Optional[str] = None
class RepositoryStructureValidator:
"""Validates repository structure against XML or JSON definition"""
def __init__(self, schema_path: str, repo_path: str = ".", schema_format: str = "auto"):
"""
Initialize validator
Args:
schema_path: Path to schema definition (XML or JSON)
repo_path: Path to repository to validate (default: current directory)
schema_format: Format of schema file ('xml', 'json', or 'auto' for auto-detection)
"""
self.schema_path = schema_path
self.repo_path = Path(repo_path).resolve()
self.results: List[ValidationResult] = []
self.schema_format = schema_format
self.structure_data = None
# Determine format
if self.schema_format == "auto":
self.schema_format = self._detect_format()
# Load schema
try:
if self.schema_format == "xml":
self._load_xml_schema()
elif self.schema_format == "json":
self._load_json_schema()
else:
raise ValueError(f"Unsupported schema format: {self.schema_format}")
except Exception as e:
print(f"Error loading schema: {e}", file=sys.stderr)
sys.exit(3)
def _detect_format(self) -> str:
"""Auto-detect schema format from file extension"""
ext = Path(self.schema_path).suffix.lower()
if ext == ".json":
return "json"
elif ext in [".xml", ""]:
return "xml"
else:
# Try to detect from content
try:
with open(self.schema_path, 'r') as f:
content = f.read().strip()
if content.startswith('{') or content.startswith('['):
return "json"
elif content.startswith('<?xml') or content.startswith('<'):
return "xml"
except Exception:
pass
# Unable to detect format
raise ValueError(f"Unable to detect schema format for {self.schema_path}")
def _load_xml_schema(self):
"""Load XML schema"""
self.tree = ET.parse(self.schema_path)
self.root = self.tree.getroot()
self.namespace = {'rs': 'http://mokoconsulting.com/schemas/repository-structure'}
self.structure_data = self._parse_xml_to_dict()
def _load_json_schema(self):
"""Load JSON schema"""
with open(self.schema_path, 'r') as f:
self.structure_data = json.load(f)
def _parse_xml_to_dict(self) -> Dict[str, Any]:
"""Convert XML structure to dictionary format for unified processing"""
structure = {}
# Parse metadata
metadata_elem = self.root.find('rs:metadata', self.namespace)
if metadata_elem is not None:
structure['metadata'] = {
'name': self._get_element_text(metadata_elem, 'name'),
'description': self._get_element_text(metadata_elem, 'description'),
'repositoryType': self._get_element_text(metadata_elem, 'repository-type'),
'platform': self._get_element_text(metadata_elem, 'platform'),
}
# Parse structure
structure_elem = self.root.find('rs:structure', self.namespace)
if structure_elem is not None:
structure['structure'] = {}
# Parse root files
root_files_elem = structure_elem.find('rs:root-files', self.namespace)
if root_files_elem is not None:
structure['structure']['rootFiles'] = []
for file_elem in root_files_elem.findall('rs:file', self.namespace):
structure['structure']['rootFiles'].append(self._parse_xml_file(file_elem))
# Parse directories
directories_elem = structure_elem.find('rs:directories', self.namespace)
if directories_elem is not None:
structure['structure']['directories'] = []
for dir_elem in directories_elem.findall('rs:directory', self.namespace):
structure['structure']['directories'].append(self._parse_xml_directory(dir_elem))
return structure
def _parse_xml_file(self, file_elem) -> Dict[str, Any]:
"""Parse XML file element to dictionary"""
file_data = {
'name': self._get_element_text(file_elem, 'name'),
'description': self._get_element_text(file_elem, 'description'),
'requirementStatus': self._get_element_text(file_elem, 'requirement-status', 'required'),
'audience': self._get_element_text(file_elem, 'audience'),
'template': self._get_element_text(file_elem, 'template'),
}
# Handle extension attribute
if 'extension' in file_elem.attrib:
file_data['extension'] = file_elem.attrib['extension']
return {k: v for k, v in file_data.items() if v is not None}
def _parse_xml_directory(self, dir_elem) -> Dict[str, Any]:
"""Parse XML directory element to dictionary"""
dir_data = {
'name': self._get_element_text(dir_elem, 'name'),
'path': dir_elem.attrib.get('path'),
'description': self._get_element_text(dir_elem, 'description'),
'requirementStatus': self._get_element_text(dir_elem, 'requirement-status', 'required'),
'purpose': self._get_element_text(dir_elem, 'purpose'),
}
# Parse files within directory
files_elem = dir_elem.find('rs:files', self.namespace)
if files_elem is not None:
dir_data['files'] = []
for file_elem in files_elem.findall('rs:file', self.namespace):
dir_data['files'].append(self._parse_xml_file(file_elem))
# Parse subdirectories
subdirs_elem = dir_elem.find('rs:subdirectories', self.namespace)
if subdirs_elem is not None:
dir_data['subdirectories'] = []
for subdir_elem in subdirs_elem.findall('rs:directory', self.namespace):
dir_data['subdirectories'].append(self._parse_xml_directory(subdir_elem))
return {k: v for k, v in dir_data.items() if v is not None}
def _get_element_text(self, parent, tag_name, default=None):
"""Get text content of XML element"""
if self.schema_format == "xml":
elem = parent.find(f'rs:{tag_name}', self.namespace)
return elem.text if elem is not None else default
return default
def validate(self) -> List[ValidationResult]:
"""
Run all validation checks
Returns:
List of validation results
"""
self.results = []
print(f"Validating repository: {self.repo_path}")
print(f"Against schema: {self.schema_path} (format: {self.schema_format})")
print("-" * 80)
# Validate root files
if 'structure' in self.structure_data and 'rootFiles' in self.structure_data['structure']:
for file_def in self.structure_data['structure']['rootFiles']:
self._validate_file(file_def, self.repo_path)
# Validate directories
if 'structure' in self.structure_data and 'directories' in self.structure_data['structure']:
for dir_def in self.structure_data['structure']['directories']:
self._validate_directory(dir_def, self.repo_path)
return self.results
def _validate_file(self, file_def: Dict[str, Any], parent_path: Path):
"""Validate a file requirement"""
file_name = file_def.get('name')
requirement_status = RequirementStatus(file_def.get('requirementStatus', 'required'))
file_path = parent_path / file_name
exists = file_path.exists() and file_path.is_file()
if requirement_status == RequirementStatus.REQUIRED and not exists:
self.results.append(ValidationResult(
severity=Severity.ERROR,
message=f"Required file missing: {file_name}",
path=str(file_path.relative_to(self.repo_path)),
requirement_status=requirement_status
))
elif requirement_status == RequirementStatus.SUGGESTED and not exists:
self.results.append(ValidationResult(
severity=Severity.WARNING,
message=f"Suggested file missing: {file_name}",
path=str(file_path.relative_to(self.repo_path)),
requirement_status=requirement_status
))
elif requirement_status == RequirementStatus.NOT_ALLOWED and exists:
self.results.append(ValidationResult(
severity=Severity.ERROR,
message=f"Not-allowed file present: {file_name} (should not be committed)",
path=str(file_path.relative_to(self.repo_path)),
requirement_status=requirement_status
))
elif exists:
self.results.append(ValidationResult(
severity=Severity.INFO,
message=f"File present: {file_name}",
path=str(file_path.relative_to(self.repo_path)),
requirement_status=requirement_status
))
def _validate_directory(self, dir_def: Dict[str, Any], parent_path: Path):
"""Validate a directory requirement"""
dir_name = dir_def.get('name')
dir_path_str = dir_def.get('path', dir_name)
requirement_status = RequirementStatus(dir_def.get('requirementStatus', 'required'))
dir_path = self.repo_path / dir_path_str
exists = dir_path.exists() and dir_path.is_dir()
if requirement_status == RequirementStatus.REQUIRED and not exists:
self.results.append(ValidationResult(
severity=Severity.ERROR,
message=f"Required directory missing: {dir_name}",
path=dir_path_str,
requirement_status=requirement_status
))
return # Skip validating contents if directory doesn't exist
elif requirement_status == RequirementStatus.SUGGESTED and not exists:
self.results.append(ValidationResult(
severity=Severity.WARNING,
message=f"Suggested directory missing: {dir_name}",
path=dir_path_str,
requirement_status=requirement_status
))
return
elif requirement_status == RequirementStatus.NOT_ALLOWED and exists:
self.results.append(ValidationResult(
severity=Severity.ERROR,
message=f"Not-allowed directory present: {dir_name} (should not be committed)",
path=dir_path_str,
requirement_status=requirement_status
))
return
elif exists:
self.results.append(ValidationResult(
severity=Severity.INFO,
message=f"Directory present: {dir_name}",
path=dir_path_str,
requirement_status=requirement_status
))
# Validate files within directory
if exists and 'files' in dir_def:
for file_def in dir_def['files']:
self._validate_file(file_def, dir_path)
# Validate subdirectories
if exists and 'subdirectories' in dir_def:
for subdir_def in dir_def['subdirectories']:
self._validate_directory(subdir_def, dir_path)
def print_results(self):
"""Print validation results"""
errors = [r for r in self.results if r.severity == Severity.ERROR]
warnings = [r for r in self.results if r.severity == Severity.WARNING]
infos = [r for r in self.results if r.severity == Severity.INFO]
print("\n" + "=" * 80)
print("VALIDATION RESULTS")
print("=" * 80)
if errors:
print(f"\n❌ ERRORS ({len(errors)}):")
for result in errors:
print(f" {result.path}: {result.message}")
if warnings:
print(f"\n⚠️ WARNINGS ({len(warnings)}):")
for result in warnings:
print(f" {result.path}: {result.message}")
if infos:
print(f"\n✓ INFO ({len(infos)} items validated successfully)")
print("\n" + "=" * 80)
print(f"Summary: {len(errors)} errors, {len(warnings)} warnings, {len(infos)} info")
print("=" * 80)
return len(errors), len(warnings)
def main():
"""Main entry point"""
parser = argparse.ArgumentParser(
description='Validate repository structure against XML or JSON schema',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument(
'--schema',
default='scripts/definitions/default-repository.xml',
help='Path to schema file (XML or JSON). Default: scripts/definitions/default-repository.xml'
)
parser.add_argument(
'--format',
choices=['xml', 'json', 'auto'],
default='auto',
help='Schema format (xml, json, or auto-detect). Default: auto'
)
parser.add_argument(
'--repo-path',
default='.',
help='Path to repository to validate. Default: current directory'
)
args = parser.parse_args()
# Create validator
validator = RepositoryStructureValidator(
schema_path=args.schema,
repo_path=args.repo_path,
schema_format=args.format
)
# Run validation
validator.validate()
# Print results
errors, warnings = validator.print_results()
# Exit with appropriate code
if errors > 0:
sys.exit(1) # Errors found
elif warnings > 0:
sys.exit(2) # Only warnings
else:
sys.exit(0) # Success
if __name__ == '__main__':
main()