chore: sync workflows, scripts, and configurations from MokoStandards
This commit is contained in:
533
scripts/validate/auto_detect_platform.py
Executable file
533
scripts/validate/auto_detect_platform.py
Executable file
@@ -0,0 +1,533 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Auto-Detect Repository Platform v03.00.00 - Critical Validator Infrastructure.
|
||||
|
||||
This script automatically detects repository platform types with confidence scoring
|
||||
and provides JSON/CLI output for automation workflows.
|
||||
|
||||
Platform detection capabilities:
|
||||
- Joomla/WaaS components (manifest patterns, version detection)
|
||||
- Dolibarr/CRM modules (module.php, core/ structure)
|
||||
- Generic repositories (fallback with confidence scoring)
|
||||
|
||||
Usage:
|
||||
python3 auto_detect_platform.py [--repo-path PATH] [--json] [--verbose] [--cache]
|
||||
|
||||
Examples:
|
||||
# Auto-detect current repository with JSON output
|
||||
python3 auto_detect_platform.py --json
|
||||
|
||||
# Detect specific repository with caching
|
||||
python3 auto_detect_platform.py --repo-path /path/to/repo --cache --verbose
|
||||
|
||||
# JSON output for CI/CD automation
|
||||
python3 auto_detect_platform.py --json | jq '.platform_type'
|
||||
|
||||
Exit codes:
|
||||
0: Success (platform detected successfully)
|
||||
1: Detection failed (no platform could be determined)
|
||||
2: Configuration error (invalid arguments or paths)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import pickle
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from dataclasses import dataclass, asdict
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
|
||||
# Version
|
||||
__version__ = "03.00.00"
|
||||
|
||||
|
||||
class PlatformType(Enum):
|
||||
"""Repository platform types enumeration."""
|
||||
|
||||
JOOMLA = "joomla"
|
||||
DOLIBARR = "dolibarr"
|
||||
GENERIC = "generic"
|
||||
|
||||
|
||||
@dataclass
|
||||
class DetectionResult:
|
||||
"""Platform detection result with confidence scoring.
|
||||
|
||||
Attributes:
|
||||
platform_type: Detected platform type enum value.
|
||||
confidence: Confidence score from 0-100.
|
||||
indicators: List of detection indicators found.
|
||||
metadata: Additional platform-specific metadata.
|
||||
"""
|
||||
|
||||
platform_type: PlatformType
|
||||
confidence: int
|
||||
indicators: List[str]
|
||||
metadata: Dict[str, str]
|
||||
|
||||
def to_dict(self) -> Dict[str, any]:
|
||||
"""Convert detection result to dictionary for JSON serialization.
|
||||
|
||||
Returns:
|
||||
Dictionary representation with platform_type as string value.
|
||||
"""
|
||||
return {
|
||||
"platform_type": self.platform_type.value,
|
||||
"confidence": self.confidence,
|
||||
"indicators": self.indicators,
|
||||
"metadata": self.metadata
|
||||
}
|
||||
|
||||
|
||||
class DetectionCache:
|
||||
"""Simple file-based cache for platform detection results.
|
||||
|
||||
Caches detection results based on repository path hash to avoid
|
||||
re-scanning the same repository repeatedly.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_dir: Optional[Path] = None) -> None:
|
||||
"""Initialize detection cache.
|
||||
|
||||
Args:
|
||||
cache_dir: Directory for cache files. Defaults to ~/.cache/mokostudios.
|
||||
"""
|
||||
if cache_dir is None:
|
||||
cache_dir = Path.home() / ".cache" / "mokostudios" / "platform_detection"
|
||||
|
||||
self.cache_dir = cache_dir
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _get_cache_key(self, repo_path: Path) -> str:
|
||||
"""Generate cache key from repository path.
|
||||
|
||||
Args:
|
||||
repo_path: Absolute path to repository.
|
||||
|
||||
Returns:
|
||||
SHA256 hash of the repository path as hex string.
|
||||
"""
|
||||
return hashlib.sha256(str(repo_path).encode()).hexdigest()
|
||||
|
||||
def get(self, repo_path: Path) -> Optional[DetectionResult]:
|
||||
"""Retrieve cached detection result.
|
||||
|
||||
Args:
|
||||
repo_path: Path to repository.
|
||||
|
||||
Returns:
|
||||
Cached DetectionResult if available, None otherwise.
|
||||
"""
|
||||
cache_file = self.cache_dir / f"{self._get_cache_key(repo_path)}.pkl"
|
||||
|
||||
if not cache_file.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
with open(cache_file, 'rb') as f:
|
||||
return pickle.load(f)
|
||||
except (pickle.PickleError, OSError, EOFError):
|
||||
return None
|
||||
|
||||
def set(self, repo_path: Path, result: DetectionResult) -> None:
|
||||
"""Store detection result in cache.
|
||||
|
||||
Args:
|
||||
repo_path: Path to repository.
|
||||
result: Detection result to cache.
|
||||
"""
|
||||
cache_file = self.cache_dir / f"{self._get_cache_key(repo_path)}.pkl"
|
||||
|
||||
try:
|
||||
with open(cache_file, 'wb') as f:
|
||||
pickle.dump(result, f)
|
||||
except (pickle.PickleError, OSError):
|
||||
pass
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear all cached detection results."""
|
||||
for cache_file in self.cache_dir.glob("*.pkl"):
|
||||
try:
|
||||
cache_file.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
class PlatformDetector:
|
||||
"""Detects repository platform type with enhanced detection algorithms.
|
||||
|
||||
Provides platform detection for Joomla, Dolibarr, and generic repositories
|
||||
with confidence scoring and detailed indicators.
|
||||
"""
|
||||
|
||||
def __init__(self, repo_path: Path, use_cache: bool = False) -> None:
|
||||
"""Initialize platform detector.
|
||||
|
||||
Args:
|
||||
repo_path: Path to repository to analyze.
|
||||
use_cache: Enable caching for performance optimization.
|
||||
"""
|
||||
self.repo_path = Path(repo_path).resolve()
|
||||
self.use_cache = use_cache
|
||||
self.cache = DetectionCache() if use_cache else None
|
||||
|
||||
if not self.repo_path.exists():
|
||||
raise ValueError(f"Repository path does not exist: {self.repo_path}")
|
||||
|
||||
def detect(self) -> DetectionResult:
|
||||
"""Detect repository platform type.
|
||||
|
||||
Executes platform-specific detection methods in order:
|
||||
1. Joomla detection (manifest patterns, directory structure)
|
||||
2. Dolibarr detection (module.php, core/ structure)
|
||||
3. Generic fallback (confidence-based scoring)
|
||||
|
||||
Returns:
|
||||
DetectionResult with platform type and confidence score.
|
||||
"""
|
||||
if self.use_cache and self.cache:
|
||||
cached_result = self.cache.get(self.repo_path)
|
||||
if cached_result:
|
||||
return cached_result
|
||||
|
||||
joomla_result = self._detect_joomla()
|
||||
if joomla_result.confidence >= 50:
|
||||
if self.use_cache and self.cache:
|
||||
self.cache.set(self.repo_path, joomla_result)
|
||||
return joomla_result
|
||||
|
||||
dolibarr_result = self._detect_dolibarr()
|
||||
if dolibarr_result.confidence >= 50:
|
||||
if self.use_cache and self.cache:
|
||||
self.cache.set(self.repo_path, dolibarr_result)
|
||||
return dolibarr_result
|
||||
|
||||
generic_result = self._detect_generic()
|
||||
if self.use_cache and self.cache:
|
||||
self.cache.set(self.repo_path, generic_result)
|
||||
return generic_result
|
||||
|
||||
def _detect_joomla(self) -> DetectionResult:
|
||||
"""Detect Joomla component with enhanced manifest pattern matching.
|
||||
|
||||
Detection criteria:
|
||||
- XML manifest files with <extension> or <install> root tags
|
||||
- Extension type attribute (component, module, plugin, etc.)
|
||||
- Joomla version tags in manifest
|
||||
- Directory structure (site/, admin/, administrator/)
|
||||
- Language directories (language/en-GB/)
|
||||
|
||||
Returns:
|
||||
DetectionResult for Joomla platform with confidence score.
|
||||
"""
|
||||
confidence = 0
|
||||
indicators: List[str] = []
|
||||
metadata: Dict[str, str] = {}
|
||||
|
||||
manifest_patterns = ["**/*.xml"]
|
||||
skip_dirs = {".git", "vendor", "node_modules", ".github"}
|
||||
|
||||
for xml_file in self.repo_path.glob("**/*.xml"):
|
||||
if any(skip_dir in xml_file.parts for skip_dir in skip_dirs):
|
||||
continue
|
||||
|
||||
try:
|
||||
tree = ET.parse(xml_file)
|
||||
root = tree.getroot()
|
||||
|
||||
if root.tag in ["extension", "install"]:
|
||||
ext_type = root.get("type", "")
|
||||
|
||||
if ext_type in ["component", "module", "plugin", "library", "template", "file"]:
|
||||
confidence += 50
|
||||
rel_path = xml_file.relative_to(self.repo_path)
|
||||
indicators.append(f"Joomla manifest: {rel_path} (type={ext_type})")
|
||||
metadata["manifest_file"] = str(rel_path)
|
||||
metadata["extension_type"] = ext_type
|
||||
|
||||
version_elem = root.find("version")
|
||||
if version_elem is not None and version_elem.text:
|
||||
confidence += 10
|
||||
metadata["version"] = version_elem.text.strip()
|
||||
indicators.append(f"Joomla version tag: {version_elem.text.strip()}")
|
||||
|
||||
name_elem = root.find("name")
|
||||
if name_elem is not None and name_elem.text:
|
||||
metadata["extension_name"] = name_elem.text.strip()
|
||||
|
||||
break
|
||||
|
||||
except (ET.ParseError, OSError):
|
||||
continue
|
||||
|
||||
joomla_dirs = ["site", "admin", "administrator"]
|
||||
for dir_name in joomla_dirs:
|
||||
if (self.repo_path / dir_name).is_dir():
|
||||
confidence += 15
|
||||
indicators.append(f"Joomla directory structure: {dir_name}/")
|
||||
|
||||
if (self.repo_path / "language" / "en-GB").exists():
|
||||
confidence += 10
|
||||
indicators.append("Joomla language directory: language/en-GB/")
|
||||
|
||||
media_dir = self.repo_path / "media"
|
||||
if media_dir.is_dir() and list(media_dir.glob("**/*.css")):
|
||||
confidence += 5
|
||||
indicators.append("Joomla media directory with assets")
|
||||
|
||||
confidence = min(confidence, 100)
|
||||
|
||||
return DetectionResult(
|
||||
platform_type=PlatformType.JOOMLA,
|
||||
confidence=confidence,
|
||||
indicators=indicators,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
def _detect_dolibarr(self) -> DetectionResult:
|
||||
"""Detect Dolibarr module with enhanced structure analysis.
|
||||
|
||||
Detection criteria:
|
||||
- Module descriptor files (mod*.class.php)
|
||||
- DolibarrModules class extension patterns
|
||||
- core/modules/ directory structure
|
||||
- SQL migration files in sql/
|
||||
- Class and lib directories
|
||||
|
||||
Returns:
|
||||
DetectionResult for Dolibarr platform with confidence score.
|
||||
"""
|
||||
confidence = 0
|
||||
indicators: List[str] = []
|
||||
metadata: Dict[str, str] = {}
|
||||
|
||||
descriptor_patterns = ["**/mod*.class.php", "**/core/modules/**/*.php"]
|
||||
skip_dirs = {".git", "vendor", "node_modules"}
|
||||
|
||||
for pattern in descriptor_patterns:
|
||||
for php_file in self.repo_path.glob(pattern):
|
||||
if any(skip_dir in php_file.parts for skip_dir in skip_dirs):
|
||||
continue
|
||||
|
||||
try:
|
||||
content = php_file.read_text(encoding="utf-8", errors="ignore")
|
||||
|
||||
dolibarr_patterns = [
|
||||
"extends DolibarrModules",
|
||||
"class mod",
|
||||
"$this->numero",
|
||||
"$this->rights_class",
|
||||
"DolibarrModules",
|
||||
"dol_include_once"
|
||||
]
|
||||
|
||||
pattern_matches = sum(1 for p in dolibarr_patterns if p in content)
|
||||
|
||||
if pattern_matches >= 3:
|
||||
confidence += 60
|
||||
rel_path = php_file.relative_to(self.repo_path)
|
||||
indicators.append(f"Dolibarr module descriptor: {rel_path}")
|
||||
metadata["descriptor_file"] = str(rel_path)
|
||||
|
||||
if "class mod" in content:
|
||||
import re
|
||||
match = re.search(r'class\s+(mod\w+)', content)
|
||||
if match:
|
||||
metadata["module_class"] = match.group(1)
|
||||
|
||||
break
|
||||
|
||||
except (OSError, UnicodeDecodeError):
|
||||
continue
|
||||
|
||||
dolibarr_dirs = ["core/modules", "sql", "class", "lib", "langs"]
|
||||
for dir_name in dolibarr_dirs:
|
||||
dir_path = self.repo_path / dir_name
|
||||
if dir_path.exists():
|
||||
confidence += 8
|
||||
indicators.append(f"Dolibarr directory structure: {dir_name}/")
|
||||
|
||||
sql_dir = self.repo_path / "sql"
|
||||
if sql_dir.is_dir():
|
||||
sql_files = list(sql_dir.glob("*.sql"))
|
||||
if sql_files:
|
||||
confidence += 10
|
||||
indicators.append(f"Dolibarr SQL files: {len(sql_files)} migration scripts")
|
||||
metadata["sql_files_count"] = str(len(sql_files))
|
||||
|
||||
confidence = min(confidence, 100)
|
||||
|
||||
return DetectionResult(
|
||||
platform_type=PlatformType.DOLIBARR,
|
||||
confidence=confidence,
|
||||
indicators=indicators,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
def _detect_generic(self) -> DetectionResult:
|
||||
"""Fallback detection for generic repositories with confidence scoring.
|
||||
|
||||
Provides baseline detection when no specific platform is identified.
|
||||
Confidence score based on standard repository structure indicators.
|
||||
|
||||
Returns:
|
||||
DetectionResult for generic platform with confidence score.
|
||||
"""
|
||||
confidence = 50
|
||||
indicators: List[str] = ["No platform-specific markers found"]
|
||||
metadata: Dict[str, str] = {
|
||||
"checked_platforms": "Joomla, Dolibarr",
|
||||
"detection_reason": "Generic repository fallback"
|
||||
}
|
||||
|
||||
standard_files = ["README.md", "LICENSE", ".gitignore", "composer.json", "package.json"]
|
||||
found_files = []
|
||||
|
||||
for file_name in standard_files:
|
||||
if (self.repo_path / file_name).exists():
|
||||
found_files.append(file_name)
|
||||
confidence += 5
|
||||
|
||||
if found_files:
|
||||
indicators.append(f"Standard repository files: {', '.join(found_files)}")
|
||||
|
||||
standard_dirs = ["src", "tests", "docs", ".github"]
|
||||
found_dirs = []
|
||||
|
||||
for dir_name in standard_dirs:
|
||||
if (self.repo_path / dir_name).is_dir():
|
||||
found_dirs.append(dir_name)
|
||||
confidence += 3
|
||||
|
||||
if found_dirs:
|
||||
indicators.append(f"Standard directory structure: {', '.join(found_dirs)}")
|
||||
|
||||
confidence = min(confidence, 100)
|
||||
|
||||
return DetectionResult(
|
||||
platform_type=PlatformType.GENERIC,
|
||||
confidence=confidence,
|
||||
indicators=indicators,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Main entry point for platform detection CLI.
|
||||
|
||||
Returns:
|
||||
Exit code: 0 for success, 1 for detection failure, 2 for config error.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description=f"Auto-detect repository platform v{__version__}",
|
||||
epilog="For more information, see docs/scripts/validate/"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--repo-path",
|
||||
type=str,
|
||||
default=".",
|
||||
help="Path to repository to analyze (default: current directory)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Output results in JSON format for automation"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
action="store_true",
|
||||
help="Enable verbose output with detailed indicators"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cache",
|
||||
action="store_true",
|
||||
help="Enable caching for performance (stores results in ~/.cache/mokostudios)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--clear-cache",
|
||||
action="store_true",
|
||||
help="Clear detection cache and exit"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--version",
|
||||
action="version",
|
||||
version=f"%(prog)s {__version__}"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.clear_cache:
|
||||
cache = DetectionCache()
|
||||
cache.clear()
|
||||
if not args.json:
|
||||
print("✓ Detection cache cleared")
|
||||
return 0
|
||||
|
||||
try:
|
||||
repo_path = Path(args.repo_path).resolve()
|
||||
|
||||
if not repo_path.exists():
|
||||
if args.json:
|
||||
print(json.dumps({"error": "Repository path does not exist", "path": str(repo_path)}))
|
||||
else:
|
||||
print(f"✗ Error: Repository path does not exist: {repo_path}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
detector = PlatformDetector(repo_path, use_cache=args.cache)
|
||||
result = detector.detect()
|
||||
|
||||
if args.json:
|
||||
output = result.to_dict()
|
||||
output["repo_path"] = str(repo_path)
|
||||
output["version"] = __version__
|
||||
print(json.dumps(output, indent=2))
|
||||
else:
|
||||
print("=" * 70)
|
||||
print(f"Platform Auto-Detection v{__version__}")
|
||||
print("=" * 70)
|
||||
print()
|
||||
print(f"📁 Repository: {repo_path}")
|
||||
print(f"🔍 Platform: {result.platform_type.value.upper()}")
|
||||
print(f"📊 Confidence: {result.confidence}%")
|
||||
print()
|
||||
|
||||
if args.verbose and result.indicators:
|
||||
print("Detection Indicators:")
|
||||
for indicator in result.indicators:
|
||||
print(f" • {indicator}")
|
||||
print()
|
||||
|
||||
if args.verbose and result.metadata:
|
||||
print("Metadata:")
|
||||
for key, value in result.metadata.items():
|
||||
print(f" {key}: {value}")
|
||||
print()
|
||||
|
||||
if args.cache:
|
||||
print("💾 Result cached for future runs")
|
||||
print()
|
||||
|
||||
print("=" * 70)
|
||||
|
||||
return 0
|
||||
|
||||
except ValueError as e:
|
||||
if args.json:
|
||||
print(json.dumps({"error": str(e)}))
|
||||
else:
|
||||
print(f"✗ Error: {e}", file=sys.stderr)
|
||||
return 2
|
||||
except Exception as e:
|
||||
if args.json:
|
||||
print(json.dumps({"error": f"Unexpected error: {str(e)}"}))
|
||||
else:
|
||||
print(f"✗ Unexpected error: {e}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
258
scripts/validate/validate_codeql_config.py
Executable file
258
scripts/validate/validate_codeql_config.py
Executable file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
|
||||
|
||||
This file is part of a Moko Consulting project.
|
||||
|
||||
SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
FILE INFORMATION
|
||||
DEFGROUP: MokoStandards.Scripts.Validate
|
||||
INGROUP: MokoStandards
|
||||
REPO: https://github.com/mokoconsulting-tech/MokoStandards
|
||||
PATH: /scripts/validate/validate_codeql_config.py
|
||||
VERSION: 01.00.00
|
||||
BRIEF: Validates CodeQL workflow language configuration matches repository contents
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Set, Tuple
|
||||
|
||||
try:
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
# Language to file extension mapping
|
||||
LANGUAGE_EXTENSIONS = {
|
||||
'python': {'.py'},
|
||||
'javascript': {'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'},
|
||||
'php': {'.php'},
|
||||
'java': {'.java'},
|
||||
'go': {'.go'},
|
||||
'ruby': {'.rb'},
|
||||
'cpp': {'.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'},
|
||||
'csharp': {'.cs'},
|
||||
}
|
||||
|
||||
|
||||
def detect_languages_in_repo(repo_path: Path, exclude_dirs: Set[str] = None) -> Dict[str, int]:
|
||||
"""
|
||||
Detect programming languages present in the repository by scanning file extensions.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the repository root
|
||||
exclude_dirs: Set of directory names to exclude from scanning
|
||||
|
||||
Returns:
|
||||
Dictionary mapping language names to file counts
|
||||
"""
|
||||
if exclude_dirs is None:
|
||||
exclude_dirs = {'.git', 'vendor', 'node_modules', '.venv', 'venv', '__pycache__'}
|
||||
|
||||
language_counts = {}
|
||||
|
||||
for language, extensions in LANGUAGE_EXTENSIONS.items():
|
||||
count = 0
|
||||
for ext in extensions:
|
||||
for file_path in repo_path.rglob(f'*{ext}'):
|
||||
# Skip excluded directories
|
||||
if any(excluded in file_path.parts for excluded in exclude_dirs):
|
||||
continue
|
||||
if file_path.is_file():
|
||||
count += 1
|
||||
|
||||
if count > 0:
|
||||
language_counts[language] = count
|
||||
|
||||
return language_counts
|
||||
|
||||
|
||||
def parse_codeql_workflow(workflow_path: Path) -> Tuple[List[str], bool]:
|
||||
"""
|
||||
Parse CodeQL workflow file and extract configured languages.
|
||||
|
||||
Args:
|
||||
workflow_path: Path to the CodeQL workflow YAML file
|
||||
|
||||
Returns:
|
||||
Tuple of (list of configured languages, whether parsing succeeded)
|
||||
"""
|
||||
try:
|
||||
with open(workflow_path, 'r') as f:
|
||||
workflow = yaml.safe_load(f)
|
||||
|
||||
# Navigate to the matrix.language configuration
|
||||
jobs = workflow.get('jobs', {})
|
||||
for job_name, job_config in jobs.items():
|
||||
strategy = job_config.get('strategy', {})
|
||||
matrix = strategy.get('matrix', {})
|
||||
languages = matrix.get('language', [])
|
||||
|
||||
if languages:
|
||||
return languages, True
|
||||
|
||||
return [], False
|
||||
except Exception as e:
|
||||
print(f"Error parsing workflow: {e}", file=sys.stderr)
|
||||
return [], False
|
||||
|
||||
|
||||
def validate_codeql_config(repo_path: Path, workflow_path: Path) -> Tuple[bool, List[str], List[str]]:
|
||||
"""
|
||||
Validate that CodeQL workflow languages match repository contents.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the repository root
|
||||
workflow_path: Path to the CodeQL workflow file
|
||||
|
||||
Returns:
|
||||
Tuple of (is_valid, list of errors, list of warnings)
|
||||
"""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
# Check if workflow file exists
|
||||
if not workflow_path.exists():
|
||||
errors.append(f"CodeQL workflow not found at: {workflow_path}")
|
||||
return False, errors, warnings
|
||||
|
||||
# Detect languages in repository
|
||||
detected_languages = detect_languages_in_repo(repo_path)
|
||||
|
||||
if not detected_languages:
|
||||
warnings.append("No supported programming languages detected in repository")
|
||||
return True, errors, warnings
|
||||
|
||||
# Parse CodeQL workflow configuration
|
||||
configured_languages, parse_success = parse_codeql_workflow(workflow_path)
|
||||
|
||||
if not parse_success:
|
||||
errors.append("Could not find language configuration in CodeQL workflow")
|
||||
return False, errors, warnings
|
||||
|
||||
if not configured_languages:
|
||||
errors.append("No languages configured in CodeQL workflow matrix")
|
||||
return False, errors, warnings
|
||||
|
||||
# Compare detected vs configured languages
|
||||
detected_set = set(detected_languages.keys())
|
||||
configured_set = set(configured_languages)
|
||||
|
||||
# Languages configured but not present in repo
|
||||
extra_languages = configured_set - detected_set
|
||||
if extra_languages:
|
||||
for lang in extra_languages:
|
||||
errors.append(
|
||||
f"Language '{lang}' is configured in CodeQL but no {lang.upper()} files found in repository. "
|
||||
f"This will cause CodeQL analysis to fail."
|
||||
)
|
||||
|
||||
# Languages present but not configured
|
||||
missing_languages = detected_set - configured_set
|
||||
if missing_languages:
|
||||
for lang in missing_languages:
|
||||
file_count = detected_languages[lang]
|
||||
warnings.append(
|
||||
f"Language '{lang}' has {file_count} files in repository but is not configured in CodeQL workflow. "
|
||||
f"Consider adding it for security scanning."
|
||||
)
|
||||
|
||||
is_valid = len(errors) == 0
|
||||
return is_valid, errors, warnings
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the validation script."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Validate CodeQL workflow language configuration against repository contents'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--repo-path',
|
||||
type=Path,
|
||||
default=Path('.'),
|
||||
help='Path to repository root (default: current directory)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--workflow-path',
|
||||
type=Path,
|
||||
help='Path to CodeQL workflow file (default: .github/workflows/codeql-analysis.yml)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--strict',
|
||||
action='store_true',
|
||||
help='Treat warnings as errors'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_path = args.repo_path.resolve()
|
||||
workflow_path = args.workflow_path
|
||||
|
||||
if workflow_path is None:
|
||||
workflow_path = repo_path / '.github' / 'workflows' / 'codeql-analysis.yml'
|
||||
else:
|
||||
workflow_path = workflow_path.resolve()
|
||||
|
||||
print(f"Validating CodeQL configuration...")
|
||||
print(f"Repository: {repo_path}")
|
||||
print(f"Workflow: {workflow_path}")
|
||||
print()
|
||||
|
||||
# Detect languages first for informational purposes
|
||||
detected_languages = detect_languages_in_repo(repo_path)
|
||||
if detected_languages:
|
||||
print("Detected languages in repository:")
|
||||
for lang, count in sorted(detected_languages.items()):
|
||||
print(f" - {lang}: {count} files")
|
||||
print()
|
||||
|
||||
# Validate configuration
|
||||
is_valid, errors, warnings = validate_codeql_config(repo_path, workflow_path)
|
||||
|
||||
# Print results
|
||||
if errors:
|
||||
print("❌ ERRORS:")
|
||||
for error in errors:
|
||||
print(f" - {error}")
|
||||
print()
|
||||
|
||||
if warnings:
|
||||
print("⚠️ WARNINGS:")
|
||||
for warning in warnings:
|
||||
print(f" - {warning}")
|
||||
print()
|
||||
|
||||
if is_valid and not warnings:
|
||||
print("✅ CodeQL configuration is valid and matches repository contents")
|
||||
return 0
|
||||
elif is_valid:
|
||||
print("✅ CodeQL configuration is valid (with warnings)")
|
||||
if args.strict:
|
||||
print("❌ Strict mode enabled: treating warnings as errors")
|
||||
return 1
|
||||
return 0
|
||||
else:
|
||||
print("❌ CodeQL configuration validation failed")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
407
scripts/validate/validate_structure_v2.py
Executable file
407
scripts/validate/validate_structure_v2.py
Executable file
@@ -0,0 +1,407 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Repository Structure Validator (XML/JSON Support)
|
||||
|
||||
Validates repository structure against XML or JSON schema definitions.
|
||||
Checks for required files, directories, validates naming conventions, and enforces
|
||||
requirement statuses (required, suggested, optional, not-allowed).
|
||||
|
||||
Supports both XML and JSON schema formats for maximum flexibility.
|
||||
|
||||
Usage:
|
||||
python3 validate_structure_v2.py [--schema SCHEMA_FILE] [--format xml|json|auto] [--repo-path PATH]
|
||||
|
||||
Examples:
|
||||
# Auto-detect format from file extension
|
||||
python3 validate_structure_v2.py --schema scripts/definitions/default-repository.xml
|
||||
python3 validate_structure_v2.py --schema scripts/definitions/default-repository.json
|
||||
|
||||
# Explicit format specification
|
||||
python3 validate_structure_v2.py --schema my-schema.txt --format json --repo-path /path/to/repo
|
||||
|
||||
Exit codes:
|
||||
0: Success (all validations passed)
|
||||
1: Validation errors found (required items missing or not-allowed items present)
|
||||
2: Validation warnings (suggested items missing)
|
||||
3: Configuration error (invalid schema, missing files, etc.)
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import xml.etree.ElementTree as ET
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Tuple, Optional, Any
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class Severity(Enum):
|
||||
"""Validation severity levels"""
|
||||
ERROR = "error"
|
||||
WARNING = "warning"
|
||||
INFO = "info"
|
||||
|
||||
|
||||
class RequirementStatus(Enum):
|
||||
"""Requirement status levels"""
|
||||
REQUIRED = "required"
|
||||
SUGGESTED = "suggested"
|
||||
OPTIONAL = "optional"
|
||||
NOT_ALLOWED = "not-allowed"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Result of a validation check"""
|
||||
severity: Severity
|
||||
message: str
|
||||
path: str
|
||||
requirement_status: Optional[RequirementStatus] = None
|
||||
rule_type: Optional[str] = None
|
||||
|
||||
|
||||
class RepositoryStructureValidator:
|
||||
"""Validates repository structure against XML or JSON definition"""
|
||||
|
||||
def __init__(self, schema_path: str, repo_path: str = ".", schema_format: str = "auto"):
|
||||
"""
|
||||
Initialize validator
|
||||
|
||||
Args:
|
||||
schema_path: Path to schema definition (XML or JSON)
|
||||
repo_path: Path to repository to validate (default: current directory)
|
||||
schema_format: Format of schema file ('xml', 'json', or 'auto' for auto-detection)
|
||||
"""
|
||||
self.schema_path = schema_path
|
||||
self.repo_path = Path(repo_path).resolve()
|
||||
self.results: List[ValidationResult] = []
|
||||
self.schema_format = schema_format
|
||||
self.structure_data = None
|
||||
|
||||
# Determine format
|
||||
if self.schema_format == "auto":
|
||||
self.schema_format = self._detect_format()
|
||||
|
||||
# Load schema
|
||||
try:
|
||||
if self.schema_format == "xml":
|
||||
self._load_xml_schema()
|
||||
elif self.schema_format == "json":
|
||||
self._load_json_schema()
|
||||
else:
|
||||
raise ValueError(f"Unsupported schema format: {self.schema_format}")
|
||||
except Exception as e:
|
||||
print(f"Error loading schema: {e}", file=sys.stderr)
|
||||
sys.exit(3)
|
||||
|
||||
def _detect_format(self) -> str:
|
||||
"""Auto-detect schema format from file extension"""
|
||||
ext = Path(self.schema_path).suffix.lower()
|
||||
if ext == ".json":
|
||||
return "json"
|
||||
elif ext in [".xml", ""]:
|
||||
return "xml"
|
||||
else:
|
||||
# Try to detect from content
|
||||
try:
|
||||
with open(self.schema_path, 'r') as f:
|
||||
content = f.read().strip()
|
||||
if content.startswith('{') or content.startswith('['):
|
||||
return "json"
|
||||
elif content.startswith('<?xml') or content.startswith('<'):
|
||||
return "xml"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Unable to detect format
|
||||
raise ValueError(f"Unable to detect schema format for {self.schema_path}")
|
||||
|
||||
def _load_xml_schema(self):
|
||||
"""Load XML schema"""
|
||||
self.tree = ET.parse(self.schema_path)
|
||||
self.root = self.tree.getroot()
|
||||
self.namespace = {'rs': 'http://mokoconsulting.com/schemas/repository-structure'}
|
||||
self.structure_data = self._parse_xml_to_dict()
|
||||
|
||||
def _load_json_schema(self):
|
||||
"""Load JSON schema"""
|
||||
with open(self.schema_path, 'r') as f:
|
||||
self.structure_data = json.load(f)
|
||||
|
||||
def _parse_xml_to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert XML structure to dictionary format for unified processing"""
|
||||
structure = {}
|
||||
|
||||
# Parse metadata
|
||||
metadata_elem = self.root.find('rs:metadata', self.namespace)
|
||||
if metadata_elem is not None:
|
||||
structure['metadata'] = {
|
||||
'name': self._get_element_text(metadata_elem, 'name'),
|
||||
'description': self._get_element_text(metadata_elem, 'description'),
|
||||
'repositoryType': self._get_element_text(metadata_elem, 'repository-type'),
|
||||
'platform': self._get_element_text(metadata_elem, 'platform'),
|
||||
}
|
||||
|
||||
# Parse structure
|
||||
structure_elem = self.root.find('rs:structure', self.namespace)
|
||||
if structure_elem is not None:
|
||||
structure['structure'] = {}
|
||||
|
||||
# Parse root files
|
||||
root_files_elem = structure_elem.find('rs:root-files', self.namespace)
|
||||
if root_files_elem is not None:
|
||||
structure['structure']['rootFiles'] = []
|
||||
for file_elem in root_files_elem.findall('rs:file', self.namespace):
|
||||
structure['structure']['rootFiles'].append(self._parse_xml_file(file_elem))
|
||||
|
||||
# Parse directories
|
||||
directories_elem = structure_elem.find('rs:directories', self.namespace)
|
||||
if directories_elem is not None:
|
||||
structure['structure']['directories'] = []
|
||||
for dir_elem in directories_elem.findall('rs:directory', self.namespace):
|
||||
structure['structure']['directories'].append(self._parse_xml_directory(dir_elem))
|
||||
|
||||
return structure
|
||||
|
||||
def _parse_xml_file(self, file_elem) -> Dict[str, Any]:
|
||||
"""Parse XML file element to dictionary"""
|
||||
file_data = {
|
||||
'name': self._get_element_text(file_elem, 'name'),
|
||||
'description': self._get_element_text(file_elem, 'description'),
|
||||
'requirementStatus': self._get_element_text(file_elem, 'requirement-status', 'required'),
|
||||
'audience': self._get_element_text(file_elem, 'audience'),
|
||||
'template': self._get_element_text(file_elem, 'template'),
|
||||
}
|
||||
|
||||
# Handle extension attribute
|
||||
if 'extension' in file_elem.attrib:
|
||||
file_data['extension'] = file_elem.attrib['extension']
|
||||
|
||||
return {k: v for k, v in file_data.items() if v is not None}
|
||||
|
||||
def _parse_xml_directory(self, dir_elem) -> Dict[str, Any]:
|
||||
"""Parse XML directory element to dictionary"""
|
||||
dir_data = {
|
||||
'name': self._get_element_text(dir_elem, 'name'),
|
||||
'path': dir_elem.attrib.get('path'),
|
||||
'description': self._get_element_text(dir_elem, 'description'),
|
||||
'requirementStatus': self._get_element_text(dir_elem, 'requirement-status', 'required'),
|
||||
'purpose': self._get_element_text(dir_elem, 'purpose'),
|
||||
}
|
||||
|
||||
# Parse files within directory
|
||||
files_elem = dir_elem.find('rs:files', self.namespace)
|
||||
if files_elem is not None:
|
||||
dir_data['files'] = []
|
||||
for file_elem in files_elem.findall('rs:file', self.namespace):
|
||||
dir_data['files'].append(self._parse_xml_file(file_elem))
|
||||
|
||||
# Parse subdirectories
|
||||
subdirs_elem = dir_elem.find('rs:subdirectories', self.namespace)
|
||||
if subdirs_elem is not None:
|
||||
dir_data['subdirectories'] = []
|
||||
for subdir_elem in subdirs_elem.findall('rs:directory', self.namespace):
|
||||
dir_data['subdirectories'].append(self._parse_xml_directory(subdir_elem))
|
||||
|
||||
return {k: v for k, v in dir_data.items() if v is not None}
|
||||
|
||||
def _get_element_text(self, parent, tag_name, default=None):
|
||||
"""Get text content of XML element"""
|
||||
if self.schema_format == "xml":
|
||||
elem = parent.find(f'rs:{tag_name}', self.namespace)
|
||||
return elem.text if elem is not None else default
|
||||
return default
|
||||
|
||||
def validate(self) -> List[ValidationResult]:
|
||||
"""
|
||||
Run all validation checks
|
||||
|
||||
Returns:
|
||||
List of validation results
|
||||
"""
|
||||
self.results = []
|
||||
|
||||
print(f"Validating repository: {self.repo_path}")
|
||||
print(f"Against schema: {self.schema_path} (format: {self.schema_format})")
|
||||
print("-" * 80)
|
||||
|
||||
# Validate root files
|
||||
if 'structure' in self.structure_data and 'rootFiles' in self.structure_data['structure']:
|
||||
for file_def in self.structure_data['structure']['rootFiles']:
|
||||
self._validate_file(file_def, self.repo_path)
|
||||
|
||||
# Validate directories
|
||||
if 'structure' in self.structure_data and 'directories' in self.structure_data['structure']:
|
||||
for dir_def in self.structure_data['structure']['directories']:
|
||||
self._validate_directory(dir_def, self.repo_path)
|
||||
|
||||
return self.results
|
||||
|
||||
def _validate_file(self, file_def: Dict[str, Any], parent_path: Path):
|
||||
"""Validate a file requirement"""
|
||||
file_name = file_def.get('name')
|
||||
requirement_status = RequirementStatus(file_def.get('requirementStatus', 'required'))
|
||||
file_path = parent_path / file_name
|
||||
exists = file_path.exists() and file_path.is_file()
|
||||
|
||||
if requirement_status == RequirementStatus.REQUIRED and not exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.ERROR,
|
||||
message=f"Required file missing: {file_name}",
|
||||
path=str(file_path.relative_to(self.repo_path)),
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
elif requirement_status == RequirementStatus.SUGGESTED and not exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.WARNING,
|
||||
message=f"Suggested file missing: {file_name}",
|
||||
path=str(file_path.relative_to(self.repo_path)),
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
elif requirement_status == RequirementStatus.NOT_ALLOWED and exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.ERROR,
|
||||
message=f"Not-allowed file present: {file_name} (should not be committed)",
|
||||
path=str(file_path.relative_to(self.repo_path)),
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
elif exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.INFO,
|
||||
message=f"File present: {file_name}",
|
||||
path=str(file_path.relative_to(self.repo_path)),
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
|
||||
def _validate_directory(self, dir_def: Dict[str, Any], parent_path: Path):
|
||||
"""Validate a directory requirement"""
|
||||
dir_name = dir_def.get('name')
|
||||
dir_path_str = dir_def.get('path', dir_name)
|
||||
requirement_status = RequirementStatus(dir_def.get('requirementStatus', 'required'))
|
||||
dir_path = self.repo_path / dir_path_str
|
||||
exists = dir_path.exists() and dir_path.is_dir()
|
||||
|
||||
if requirement_status == RequirementStatus.REQUIRED and not exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.ERROR,
|
||||
message=f"Required directory missing: {dir_name}",
|
||||
path=dir_path_str,
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
return # Skip validating contents if directory doesn't exist
|
||||
elif requirement_status == RequirementStatus.SUGGESTED and not exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.WARNING,
|
||||
message=f"Suggested directory missing: {dir_name}",
|
||||
path=dir_path_str,
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
return
|
||||
elif requirement_status == RequirementStatus.NOT_ALLOWED and exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.ERROR,
|
||||
message=f"Not-allowed directory present: {dir_name} (should not be committed)",
|
||||
path=dir_path_str,
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
return
|
||||
elif exists:
|
||||
self.results.append(ValidationResult(
|
||||
severity=Severity.INFO,
|
||||
message=f"Directory present: {dir_name}",
|
||||
path=dir_path_str,
|
||||
requirement_status=requirement_status
|
||||
))
|
||||
|
||||
# Validate files within directory
|
||||
if exists and 'files' in dir_def:
|
||||
for file_def in dir_def['files']:
|
||||
self._validate_file(file_def, dir_path)
|
||||
|
||||
# Validate subdirectories
|
||||
if exists and 'subdirectories' in dir_def:
|
||||
for subdir_def in dir_def['subdirectories']:
|
||||
self._validate_directory(subdir_def, dir_path)
|
||||
|
||||
def print_results(self):
|
||||
"""Print validation results"""
|
||||
errors = [r for r in self.results if r.severity == Severity.ERROR]
|
||||
warnings = [r for r in self.results if r.severity == Severity.WARNING]
|
||||
infos = [r for r in self.results if r.severity == Severity.INFO]
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print("VALIDATION RESULTS")
|
||||
print("=" * 80)
|
||||
|
||||
if errors:
|
||||
print(f"\n❌ ERRORS ({len(errors)}):")
|
||||
for result in errors:
|
||||
print(f" {result.path}: {result.message}")
|
||||
|
||||
if warnings:
|
||||
print(f"\n⚠️ WARNINGS ({len(warnings)}):")
|
||||
for result in warnings:
|
||||
print(f" {result.path}: {result.message}")
|
||||
|
||||
if infos:
|
||||
print(f"\n✓ INFO ({len(infos)} items validated successfully)")
|
||||
|
||||
print("\n" + "=" * 80)
|
||||
print(f"Summary: {len(errors)} errors, {len(warnings)} warnings, {len(infos)} info")
|
||||
print("=" * 80)
|
||||
|
||||
return len(errors), len(warnings)
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Validate repository structure against XML or JSON schema',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__
|
||||
)
|
||||
parser.add_argument(
|
||||
'--schema',
|
||||
default='scripts/definitions/default-repository.xml',
|
||||
help='Path to schema file (XML or JSON). Default: scripts/definitions/default-repository.xml'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--format',
|
||||
choices=['xml', 'json', 'auto'],
|
||||
default='auto',
|
||||
help='Schema format (xml, json, or auto-detect). Default: auto'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--repo-path',
|
||||
default='.',
|
||||
help='Path to repository to validate. Default: current directory'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create validator
|
||||
validator = RepositoryStructureValidator(
|
||||
schema_path=args.schema,
|
||||
repo_path=args.repo_path,
|
||||
schema_format=args.format
|
||||
)
|
||||
|
||||
# Run validation
|
||||
validator.validate()
|
||||
|
||||
# Print results
|
||||
errors, warnings = validator.print_results()
|
||||
|
||||
# Exit with appropriate code
|
||||
if errors > 0:
|
||||
sys.exit(1) # Errors found
|
||||
elif warnings > 0:
|
||||
sys.exit(2) # Only warnings
|
||||
else:
|
||||
sys.exit(0) # Success
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user