Files
MokoCassiopeia/scripts/maintenance/validate_file_headers.py

333 lines
8.8 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
This file is part of a Moko Consulting project.
SPDX-License-Identifier: GPL-3.0-or-later
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
FILE INFORMATION
DEFGROUP: MokoStandards.Scripts
INGROUP: MokoStandards.Validation
REPO: https://github.com/mokoconsulting-tech/MokoStandards
PATH: /scripts/validate_file_headers.py
VERSION: 05.00.00
BRIEF: Validate copyright headers and file information in repository files
"""
import os
import sys
from pathlib import Path
from typing import List, Tuple, Dict
# File extensions that require headers
HEADER_REQUIRED_EXTENSIONS = {
'.py': 'python',
'.php': 'php',
'.md': 'markdown',
'.yml': 'yaml',
'.yaml': 'yaml',
'.sh': 'shell',
'.js': 'javascript',
'.ts': 'typescript',
'.css': 'css',
}
# Files that are exempt from header requirements
EXEMPT_FILES = {
'package.json',
'package-lock.json',
'composer.json',
'composer.lock',
'Gemfile.lock',
'.gitignore',
'.gitattributes',
'.editorconfig',
'LICENSE',
}
# Patterns indicating generated files
GENERATED_PATTERNS = [
'DO NOT EDIT',
'AUTO-GENERATED',
'AUTOGENERATED',
'Generated by',
]
# Required patterns in header
REQUIRED_HEADER_PATTERNS = [
'Copyright (C)',
'Moko Consulting',
'GPL-3.0-or-later',
]
# Required file information patterns
REQUIRED_FILE_INFO_PATTERNS = [
'FILE INFORMATION',
'DEFGROUP:',
'REPO:',
'PATH:',
'VERSION:',
'BRIEF:',
]
# Required markdown metadata patterns
REQUIRED_MARKDOWN_METADATA = [
'## Metadata',
'## Revision History',
]
def is_exempt_file(filepath: Path) -> bool:
"""Check if file is exempt from header requirements."""
if filepath.name in EXEMPT_FILES:
return True
# Check if in vendor or node_modules
if 'vendor' in filepath.parts or 'node_modules' in filepath.parts:
return True
# Check if in .git directory
if '.git' in filepath.parts:
return True
return False
def is_generated_file(content: str) -> bool:
"""Check if file appears to be auto-generated."""
first_lines = content[:1000]
return any(pattern in first_lines for pattern in GENERATED_PATTERNS)
def check_copyright_header(content: str, filepath: Path) -> Tuple[bool, List[str]]:
"""Check if file has proper copyright header."""
issues = []
first_section = content[:2000]
for pattern in REQUIRED_HEADER_PATTERNS:
if pattern not in first_section:
issues.append(f"Missing required pattern: {pattern}")
return len(issues) == 0, issues
def check_file_information(content: str, filepath: Path) -> Tuple[bool, List[str]]:
"""Check if file has proper file information block."""
issues = []
first_section = content[:2000]
for pattern in REQUIRED_FILE_INFO_PATTERNS:
if pattern not in first_section:
issues.append(f"Missing required file info: {pattern}")
return len(issues) == 0, issues
def check_markdown_metadata(content: str, filepath: Path) -> Tuple[bool, List[str]]:
"""Check if markdown file has metadata and revision history."""
issues = []
for pattern in REQUIRED_MARKDOWN_METADATA:
if pattern not in content:
issues.append(f"Missing required section: {pattern}")
return len(issues) == 0, issues
def validate_file(filepath: Path) -> Dict[str, any]:
"""Validate a single file."""
result = {
'path': str(filepath),
'valid': True,
'issues': [],
'exempt': False,
'generated': False,
}
# Check if exempt
if is_exempt_file(filepath):
result['exempt'] = True
return result
# Check file extension
if filepath.suffix not in HEADER_REQUIRED_EXTENSIONS:
result['exempt'] = True
return result
# Read file content
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
result['valid'] = False
result['issues'].append(f"Error reading file: {e}")
return result
# Check if generated
if is_generated_file(content):
result['generated'] = True
return result
# Check copyright header
valid, issues = check_copyright_header(content, filepath)
if not valid:
result['valid'] = False
result['issues'].extend(issues)
# Check file information
valid, issues = check_file_information(content, filepath)
if not valid:
result['valid'] = False
result['issues'].extend(issues)
# Additional checks for markdown files
if filepath.suffix == '.md':
valid, issues = check_markdown_metadata(content, filepath)
if not valid:
result['valid'] = False
result['issues'].extend(issues)
return result
def validate_repository(repo_path: Path) -> Dict[str, any]:
"""Validate all files in repository."""
results = {
'total': 0,
'validated': 0,
'valid': 0,
'invalid': 0,
'exempt': 0,
'generated': 0,
'files': [],
}
# Find all tracked files
for filepath in repo_path.rglob('*'):
if not filepath.is_file():
continue
results['total'] += 1
result = validate_file(filepath)
results['files'].append(result)
if result['exempt']:
results['exempt'] += 1
elif result['generated']:
results['generated'] += 1
else:
results['validated'] += 1
if result['valid']:
results['valid'] += 1
else:
results['invalid'] += 1
return results
def print_report(results: Dict[str, any], verbose: bool = False):
"""Print validation report."""
print("=" * 70)
print("FILE HEADER VALIDATION REPORT")
print("=" * 70)
print()
print(f"Total files found: {results['total']}")
print(f"Files validated: {results['validated']}")
print(f"Valid headers: {results['valid']}")
print(f"Invalid headers: {results['invalid']}")
print(f"Exempt files: {results['exempt']}")
print(f"Generated files: {results['generated']}")
print()
if results['invalid'] > 0:
print("FILES WITH ISSUES:")
print("-" * 70)
for file_result in results['files']:
if not file_result['valid'] and not file_result['exempt'] and not file_result['generated']:
print(f"\n{file_result['path']}")
for issue in file_result['issues']:
print(f"{issue}")
print()
if verbose and results['valid'] > 0:
print("\nVALID FILES:")
print("-" * 70)
for file_result in results['files']:
if file_result['valid']:
print(f"{file_result['path']}")
print()
print("=" * 70)
if results['invalid'] > 0:
compliance_rate = (results['valid'] / results['validated'] * 100) if results['validated'] > 0 else 0
print(f"Compliance Rate: {compliance_rate:.1f}%")
print()
print("ACTION REQUIRED: Fix files with missing or invalid headers")
return False
else:
print("✓ All validated files have proper headers")
return True
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Validate copyright headers and file information'
)
parser.add_argument(
'--path',
default='.',
help='Path to repository (default: current directory)'
)
parser.add_argument(
'--verbose',
action='store_true',
help='Show all files including valid ones'
)
parser.add_argument(
'--fail-on-invalid',
action='store_true',
help='Exit with error code if invalid headers found'
)
args = parser.parse_args()
repo_path = Path(args.path).resolve()
if not repo_path.exists():
print(f"Error: Path does not exist: {repo_path}", file=sys.stderr)
sys.exit(1)
print(f"Validating files in: {repo_path}")
print()
results = validate_repository(repo_path)
success = print_report(results, args.verbose)
if args.fail_on_invalid and not success:
sys.exit(1)
sys.exit(0 if success else 0)
if __name__ == "__main__":
main()