259 lines
8.0 KiB
Python
Executable File
259 lines
8.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
|
|
|
|
This file is part of a Moko Consulting project.
|
|
|
|
SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
FILE INFORMATION
|
|
DEFGROUP: MokoStandards.Scripts.Validate
|
|
INGROUP: MokoStandards
|
|
REPO: https://github.com/mokoconsulting-tech/MokoStandards
|
|
PATH: /scripts/validate/validate_codeql_config.py
|
|
VERSION: 01.00.00
|
|
BRIEF: Validates CodeQL workflow language configuration matches repository contents
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, List, Set, Tuple
|
|
|
|
try:
|
|
import yaml
|
|
except ImportError:
|
|
print("Error: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
|
|
# Language to file extension mapping
|
|
LANGUAGE_EXTENSIONS = {
|
|
'python': {'.py'},
|
|
'javascript': {'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'},
|
|
'php': {'.php'},
|
|
'java': {'.java'},
|
|
'go': {'.go'},
|
|
'ruby': {'.rb'},
|
|
'cpp': {'.cpp', '.cc', '.cxx', '.c', '.h', '.hpp'},
|
|
'csharp': {'.cs'},
|
|
}
|
|
|
|
|
|
def detect_languages_in_repo(repo_path: Path, exclude_dirs: Set[str] = None) -> Dict[str, int]:
|
|
"""
|
|
Detect programming languages present in the repository by scanning file extensions.
|
|
|
|
Args:
|
|
repo_path: Path to the repository root
|
|
exclude_dirs: Set of directory names to exclude from scanning
|
|
|
|
Returns:
|
|
Dictionary mapping language names to file counts
|
|
"""
|
|
if exclude_dirs is None:
|
|
exclude_dirs = {'.git', 'vendor', 'node_modules', '.venv', 'venv', '__pycache__'}
|
|
|
|
language_counts = {}
|
|
|
|
for language, extensions in LANGUAGE_EXTENSIONS.items():
|
|
count = 0
|
|
for ext in extensions:
|
|
for file_path in repo_path.rglob(f'*{ext}'):
|
|
# Skip excluded directories
|
|
if any(excluded in file_path.parts for excluded in exclude_dirs):
|
|
continue
|
|
if file_path.is_file():
|
|
count += 1
|
|
|
|
if count > 0:
|
|
language_counts[language] = count
|
|
|
|
return language_counts
|
|
|
|
|
|
def parse_codeql_workflow(workflow_path: Path) -> Tuple[List[str], bool]:
|
|
"""
|
|
Parse CodeQL workflow file and extract configured languages.
|
|
|
|
Args:
|
|
workflow_path: Path to the CodeQL workflow YAML file
|
|
|
|
Returns:
|
|
Tuple of (list of configured languages, whether parsing succeeded)
|
|
"""
|
|
try:
|
|
with open(workflow_path, 'r') as f:
|
|
workflow = yaml.safe_load(f)
|
|
|
|
# Navigate to the matrix.language configuration
|
|
jobs = workflow.get('jobs', {})
|
|
for job_name, job_config in jobs.items():
|
|
strategy = job_config.get('strategy', {})
|
|
matrix = strategy.get('matrix', {})
|
|
languages = matrix.get('language', [])
|
|
|
|
if languages:
|
|
return languages, True
|
|
|
|
return [], False
|
|
except Exception as e:
|
|
print(f"Error parsing workflow: {e}", file=sys.stderr)
|
|
return [], False
|
|
|
|
|
|
def validate_codeql_config(repo_path: Path, workflow_path: Path) -> Tuple[bool, List[str], List[str]]:
|
|
"""
|
|
Validate that CodeQL workflow languages match repository contents.
|
|
|
|
Args:
|
|
repo_path: Path to the repository root
|
|
workflow_path: Path to the CodeQL workflow file
|
|
|
|
Returns:
|
|
Tuple of (is_valid, list of errors, list of warnings)
|
|
"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Check if workflow file exists
|
|
if not workflow_path.exists():
|
|
errors.append(f"CodeQL workflow not found at: {workflow_path}")
|
|
return False, errors, warnings
|
|
|
|
# Detect languages in repository
|
|
detected_languages = detect_languages_in_repo(repo_path)
|
|
|
|
if not detected_languages:
|
|
warnings.append("No supported programming languages detected in repository")
|
|
return True, errors, warnings
|
|
|
|
# Parse CodeQL workflow configuration
|
|
configured_languages, parse_success = parse_codeql_workflow(workflow_path)
|
|
|
|
if not parse_success:
|
|
errors.append("Could not find language configuration in CodeQL workflow")
|
|
return False, errors, warnings
|
|
|
|
if not configured_languages:
|
|
errors.append("No languages configured in CodeQL workflow matrix")
|
|
return False, errors, warnings
|
|
|
|
# Compare detected vs configured languages
|
|
detected_set = set(detected_languages.keys())
|
|
configured_set = set(configured_languages)
|
|
|
|
# Languages configured but not present in repo
|
|
extra_languages = configured_set - detected_set
|
|
if extra_languages:
|
|
for lang in extra_languages:
|
|
errors.append(
|
|
f"Language '{lang}' is configured in CodeQL but no {lang.upper()} files found in repository. "
|
|
f"This will cause CodeQL analysis to fail."
|
|
)
|
|
|
|
# Languages present but not configured
|
|
missing_languages = detected_set - configured_set
|
|
if missing_languages:
|
|
for lang in missing_languages:
|
|
file_count = detected_languages[lang]
|
|
warnings.append(
|
|
f"Language '{lang}' has {file_count} files in repository but is not configured in CodeQL workflow. "
|
|
f"Consider adding it for security scanning."
|
|
)
|
|
|
|
is_valid = len(errors) == 0
|
|
return is_valid, errors, warnings
|
|
|
|
|
|
def main():
|
|
"""Main entry point for the validation script."""
|
|
parser = argparse.ArgumentParser(
|
|
description='Validate CodeQL workflow language configuration against repository contents'
|
|
)
|
|
parser.add_argument(
|
|
'--repo-path',
|
|
type=Path,
|
|
default=Path('.'),
|
|
help='Path to repository root (default: current directory)'
|
|
)
|
|
parser.add_argument(
|
|
'--workflow-path',
|
|
type=Path,
|
|
help='Path to CodeQL workflow file (default: .github/workflows/codeql-analysis.yml)'
|
|
)
|
|
parser.add_argument(
|
|
'--strict',
|
|
action='store_true',
|
|
help='Treat warnings as errors'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
repo_path = args.repo_path.resolve()
|
|
workflow_path = args.workflow_path
|
|
|
|
if workflow_path is None:
|
|
workflow_path = repo_path / '.github' / 'workflows' / 'codeql-analysis.yml'
|
|
else:
|
|
workflow_path = workflow_path.resolve()
|
|
|
|
print(f"Validating CodeQL configuration...")
|
|
print(f"Repository: {repo_path}")
|
|
print(f"Workflow: {workflow_path}")
|
|
print()
|
|
|
|
# Detect languages first for informational purposes
|
|
detected_languages = detect_languages_in_repo(repo_path)
|
|
if detected_languages:
|
|
print("Detected languages in repository:")
|
|
for lang, count in sorted(detected_languages.items()):
|
|
print(f" - {lang}: {count} files")
|
|
print()
|
|
|
|
# Validate configuration
|
|
is_valid, errors, warnings = validate_codeql_config(repo_path, workflow_path)
|
|
|
|
# Print results
|
|
if errors:
|
|
print("❌ ERRORS:")
|
|
for error in errors:
|
|
print(f" - {error}")
|
|
print()
|
|
|
|
if warnings:
|
|
print("⚠️ WARNINGS:")
|
|
for warning in warnings:
|
|
print(f" - {warning}")
|
|
print()
|
|
|
|
if is_valid and not warnings:
|
|
print("✅ CodeQL configuration is valid and matches repository contents")
|
|
return 0
|
|
elif is_valid:
|
|
print("✅ CodeQL configuration is valid (with warnings)")
|
|
if args.strict:
|
|
print("❌ Strict mode enabled: treating warnings as errors")
|
|
return 1
|
|
return 0
|
|
else:
|
|
print("❌ CodeQL configuration validation failed")
|
|
return 1
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|