332 lines
8.8 KiB
Python
Executable File
332 lines
8.8 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
|
|
|
|
This file is part of a Moko Consulting project.
|
|
|
|
SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
FILE INFORMATION
|
|
DEFGROUP: MokoStandards.Scripts
|
|
INGROUP: MokoStandards.Validation
|
|
REPO: https://github.com/mokoconsulting-tech/MokoStandards
|
|
PATH: /scripts/validate_file_headers.py
|
|
VERSION: 05.00.00
|
|
BRIEF: Validate copyright headers and file information in repository files
|
|
"""
|
|
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import List, Tuple, Dict
|
|
|
|
# File extensions that require headers
|
|
HEADER_REQUIRED_EXTENSIONS = {
|
|
'.py': 'python',
|
|
'.php': 'php',
|
|
'.md': 'markdown',
|
|
'.yml': 'yaml',
|
|
'.yaml': 'yaml',
|
|
'.sh': 'shell',
|
|
'.js': 'javascript',
|
|
'.ts': 'typescript',
|
|
'.css': 'css',
|
|
}
|
|
|
|
# Files that are exempt from header requirements
|
|
EXEMPT_FILES = {
|
|
'package.json',
|
|
'package-lock.json',
|
|
'composer.json',
|
|
'composer.lock',
|
|
'Gemfile.lock',
|
|
'.gitignore',
|
|
'.gitattributes',
|
|
'.editorconfig',
|
|
'LICENSE',
|
|
}
|
|
|
|
# Patterns indicating generated files
|
|
GENERATED_PATTERNS = [
|
|
'DO NOT EDIT',
|
|
'AUTO-GENERATED',
|
|
'AUTOGENERATED',
|
|
'Generated by',
|
|
]
|
|
|
|
# Required patterns in header
|
|
REQUIRED_HEADER_PATTERNS = [
|
|
'Copyright (C)',
|
|
'Moko Consulting',
|
|
'GPL-3.0-or-later',
|
|
]
|
|
|
|
# Required file information patterns
|
|
REQUIRED_FILE_INFO_PATTERNS = [
|
|
'FILE INFORMATION',
|
|
'DEFGROUP:',
|
|
'REPO:',
|
|
'PATH:',
|
|
'VERSION:',
|
|
'BRIEF:',
|
|
]
|
|
|
|
# Required markdown metadata patterns
|
|
REQUIRED_MARKDOWN_METADATA = [
|
|
'## Metadata',
|
|
'## Revision History',
|
|
]
|
|
|
|
|
|
def is_exempt_file(filepath: Path) -> bool:
|
|
"""Check if file is exempt from header requirements."""
|
|
if filepath.name in EXEMPT_FILES:
|
|
return True
|
|
|
|
# Check if in vendor or node_modules
|
|
if 'vendor' in filepath.parts or 'node_modules' in filepath.parts:
|
|
return True
|
|
|
|
# Check if in .git directory
|
|
if '.git' in filepath.parts:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
def is_generated_file(content: str) -> bool:
|
|
"""Check if file appears to be auto-generated."""
|
|
first_lines = content[:1000]
|
|
return any(pattern in first_lines for pattern in GENERATED_PATTERNS)
|
|
|
|
|
|
def check_copyright_header(content: str, filepath: Path) -> Tuple[bool, List[str]]:
|
|
"""Check if file has proper copyright header."""
|
|
issues = []
|
|
first_section = content[:2000]
|
|
|
|
for pattern in REQUIRED_HEADER_PATTERNS:
|
|
if pattern not in first_section:
|
|
issues.append(f"Missing required pattern: {pattern}")
|
|
|
|
return len(issues) == 0, issues
|
|
|
|
|
|
def check_file_information(content: str, filepath: Path) -> Tuple[bool, List[str]]:
|
|
"""Check if file has proper file information block."""
|
|
issues = []
|
|
first_section = content[:2000]
|
|
|
|
for pattern in REQUIRED_FILE_INFO_PATTERNS:
|
|
if pattern not in first_section:
|
|
issues.append(f"Missing required file info: {pattern}")
|
|
|
|
return len(issues) == 0, issues
|
|
|
|
|
|
def check_markdown_metadata(content: str, filepath: Path) -> Tuple[bool, List[str]]:
|
|
"""Check if markdown file has metadata and revision history."""
|
|
issues = []
|
|
|
|
for pattern in REQUIRED_MARKDOWN_METADATA:
|
|
if pattern not in content:
|
|
issues.append(f"Missing required section: {pattern}")
|
|
|
|
return len(issues) == 0, issues
|
|
|
|
|
|
def validate_file(filepath: Path) -> Dict[str, any]:
|
|
"""Validate a single file."""
|
|
result = {
|
|
'path': str(filepath),
|
|
'valid': True,
|
|
'issues': [],
|
|
'exempt': False,
|
|
'generated': False,
|
|
}
|
|
|
|
# Check if exempt
|
|
if is_exempt_file(filepath):
|
|
result['exempt'] = True
|
|
return result
|
|
|
|
# Check file extension
|
|
if filepath.suffix not in HEADER_REQUIRED_EXTENSIONS:
|
|
result['exempt'] = True
|
|
return result
|
|
|
|
# Read file content
|
|
try:
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
result['valid'] = False
|
|
result['issues'].append(f"Error reading file: {e}")
|
|
return result
|
|
|
|
# Check if generated
|
|
if is_generated_file(content):
|
|
result['generated'] = True
|
|
return result
|
|
|
|
# Check copyright header
|
|
valid, issues = check_copyright_header(content, filepath)
|
|
if not valid:
|
|
result['valid'] = False
|
|
result['issues'].extend(issues)
|
|
|
|
# Check file information
|
|
valid, issues = check_file_information(content, filepath)
|
|
if not valid:
|
|
result['valid'] = False
|
|
result['issues'].extend(issues)
|
|
|
|
# Additional checks for markdown files
|
|
if filepath.suffix == '.md':
|
|
valid, issues = check_markdown_metadata(content, filepath)
|
|
if not valid:
|
|
result['valid'] = False
|
|
result['issues'].extend(issues)
|
|
|
|
return result
|
|
|
|
|
|
def validate_repository(repo_path: Path) -> Dict[str, any]:
|
|
"""Validate all files in repository."""
|
|
results = {
|
|
'total': 0,
|
|
'validated': 0,
|
|
'valid': 0,
|
|
'invalid': 0,
|
|
'exempt': 0,
|
|
'generated': 0,
|
|
'files': [],
|
|
}
|
|
|
|
# Find all tracked files
|
|
for filepath in repo_path.rglob('*'):
|
|
if not filepath.is_file():
|
|
continue
|
|
|
|
results['total'] += 1
|
|
|
|
result = validate_file(filepath)
|
|
results['files'].append(result)
|
|
|
|
if result['exempt']:
|
|
results['exempt'] += 1
|
|
elif result['generated']:
|
|
results['generated'] += 1
|
|
else:
|
|
results['validated'] += 1
|
|
if result['valid']:
|
|
results['valid'] += 1
|
|
else:
|
|
results['invalid'] += 1
|
|
|
|
return results
|
|
|
|
|
|
def print_report(results: Dict[str, any], verbose: bool = False):
|
|
"""Print validation report."""
|
|
print("=" * 70)
|
|
print("FILE HEADER VALIDATION REPORT")
|
|
print("=" * 70)
|
|
print()
|
|
print(f"Total files found: {results['total']}")
|
|
print(f"Files validated: {results['validated']}")
|
|
print(f"Valid headers: {results['valid']}")
|
|
print(f"Invalid headers: {results['invalid']}")
|
|
print(f"Exempt files: {results['exempt']}")
|
|
print(f"Generated files: {results['generated']}")
|
|
print()
|
|
|
|
if results['invalid'] > 0:
|
|
print("FILES WITH ISSUES:")
|
|
print("-" * 70)
|
|
for file_result in results['files']:
|
|
if not file_result['valid'] and not file_result['exempt'] and not file_result['generated']:
|
|
print(f"\n{file_result['path']}")
|
|
for issue in file_result['issues']:
|
|
print(f" ✗ {issue}")
|
|
print()
|
|
|
|
if verbose and results['valid'] > 0:
|
|
print("\nVALID FILES:")
|
|
print("-" * 70)
|
|
for file_result in results['files']:
|
|
if file_result['valid']:
|
|
print(f" ✓ {file_result['path']}")
|
|
print()
|
|
|
|
print("=" * 70)
|
|
|
|
if results['invalid'] > 0:
|
|
compliance_rate = (results['valid'] / results['validated'] * 100) if results['validated'] > 0 else 0
|
|
print(f"Compliance Rate: {compliance_rate:.1f}%")
|
|
print()
|
|
print("ACTION REQUIRED: Fix files with missing or invalid headers")
|
|
return False
|
|
else:
|
|
print("✓ All validated files have proper headers")
|
|
return True
|
|
|
|
|
|
def main():
|
|
"""Main entry point."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description='Validate copyright headers and file information'
|
|
)
|
|
parser.add_argument(
|
|
'--path',
|
|
default='.',
|
|
help='Path to repository (default: current directory)'
|
|
)
|
|
parser.add_argument(
|
|
'--verbose',
|
|
action='store_true',
|
|
help='Show all files including valid ones'
|
|
)
|
|
parser.add_argument(
|
|
'--fail-on-invalid',
|
|
action='store_true',
|
|
help='Exit with error code if invalid headers found'
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
repo_path = Path(args.path).resolve()
|
|
|
|
if not repo_path.exists():
|
|
print(f"Error: Path does not exist: {repo_path}", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(f"Validating files in: {repo_path}")
|
|
print()
|
|
|
|
results = validate_repository(repo_path)
|
|
success = print_report(results, args.verbose)
|
|
|
|
if args.fail_on_invalid and not success:
|
|
sys.exit(1)
|
|
|
|
sys.exit(0 if success else 0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|