#!/usr/bin/env python3 """ Copyright (C) 2026 Moko Consulting This file is part of a Moko Consulting project. SPDX-License-Identifier: GPL-3.0-or-later This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . FILE INFORMATION DEFGROUP: MokoStandards.Scripts INGROUP: MokoStandards.Validation REPO: https://github.com/mokoconsulting-tech/MokoStandards PATH: /scripts/validate_file_headers.py VERSION: 05.00.00 BRIEF: Validate copyright headers and file information in repository files """ import os import sys from pathlib import Path from typing import List, Tuple, Dict # File extensions that require headers HEADER_REQUIRED_EXTENSIONS = { '.py': 'python', '.php': 'php', '.md': 'markdown', '.yml': 'yaml', '.yaml': 'yaml', '.sh': 'shell', '.js': 'javascript', '.ts': 'typescript', '.css': 'css', } # Files that are exempt from header requirements EXEMPT_FILES = { 'package.json', 'package-lock.json', 'composer.json', 'composer.lock', 'Gemfile.lock', '.gitignore', '.gitattributes', '.editorconfig', 'LICENSE', } # Patterns indicating generated files GENERATED_PATTERNS = [ 'DO NOT EDIT', 'AUTO-GENERATED', 'AUTOGENERATED', 'Generated by', ] # Required patterns in header REQUIRED_HEADER_PATTERNS = [ 'Copyright (C)', 'Moko Consulting', 'GPL-3.0-or-later', ] # Required file information patterns REQUIRED_FILE_INFO_PATTERNS = [ 'FILE INFORMATION', 'DEFGROUP:', 'REPO:', 'PATH:', 'VERSION:', 'BRIEF:', ] # Required markdown metadata patterns REQUIRED_MARKDOWN_METADATA = [ '## Metadata', '## Revision History', ] def is_exempt_file(filepath: Path) -> bool: """Check if file is exempt from header requirements.""" if filepath.name in EXEMPT_FILES: return True # Check if in vendor or node_modules if 'vendor' in filepath.parts or 'node_modules' in filepath.parts: return True # Check if in .git directory if '.git' in filepath.parts: return True return False def is_generated_file(content: str) -> bool: """Check if file appears to be auto-generated.""" first_lines = content[:1000] return any(pattern in first_lines for pattern in GENERATED_PATTERNS) def check_copyright_header(content: str, filepath: Path) -> Tuple[bool, List[str]]: """Check if file has proper copyright header.""" issues = [] first_section = content[:2000] for pattern in REQUIRED_HEADER_PATTERNS: if pattern not in first_section: issues.append(f"Missing required pattern: {pattern}") return len(issues) == 0, issues def check_file_information(content: str, filepath: Path) -> Tuple[bool, List[str]]: """Check if file has proper file information block.""" issues = [] first_section = content[:2000] for pattern in REQUIRED_FILE_INFO_PATTERNS: if pattern not in first_section: issues.append(f"Missing required file info: {pattern}") return len(issues) == 0, issues def check_markdown_metadata(content: str, filepath: Path) -> Tuple[bool, List[str]]: """Check if markdown file has metadata and revision history.""" issues = [] for pattern in REQUIRED_MARKDOWN_METADATA: if pattern not in content: issues.append(f"Missing required section: {pattern}") return len(issues) == 0, issues def validate_file(filepath: Path) -> Dict[str, any]: """Validate a single file.""" result = { 'path': str(filepath), 'valid': True, 'issues': [], 'exempt': False, 'generated': False, } # Check if exempt if is_exempt_file(filepath): result['exempt'] = True return result # Check file extension if filepath.suffix not in HEADER_REQUIRED_EXTENSIONS: result['exempt'] = True return result # Read file content try: with open(filepath, 'r', encoding='utf-8') as f: content = f.read() except Exception as e: result['valid'] = False result['issues'].append(f"Error reading file: {e}") return result # Check if generated if is_generated_file(content): result['generated'] = True return result # Check copyright header valid, issues = check_copyright_header(content, filepath) if not valid: result['valid'] = False result['issues'].extend(issues) # Check file information valid, issues = check_file_information(content, filepath) if not valid: result['valid'] = False result['issues'].extend(issues) # Additional checks for markdown files if filepath.suffix == '.md': valid, issues = check_markdown_metadata(content, filepath) if not valid: result['valid'] = False result['issues'].extend(issues) return result def validate_repository(repo_path: Path) -> Dict[str, any]: """Validate all files in repository.""" results = { 'total': 0, 'validated': 0, 'valid': 0, 'invalid': 0, 'exempt': 0, 'generated': 0, 'files': [], } # Find all tracked files for filepath in repo_path.rglob('*'): if not filepath.is_file(): continue results['total'] += 1 result = validate_file(filepath) results['files'].append(result) if result['exempt']: results['exempt'] += 1 elif result['generated']: results['generated'] += 1 else: results['validated'] += 1 if result['valid']: results['valid'] += 1 else: results['invalid'] += 1 return results def print_report(results: Dict[str, any], verbose: bool = False): """Print validation report.""" print("=" * 70) print("FILE HEADER VALIDATION REPORT") print("=" * 70) print() print(f"Total files found: {results['total']}") print(f"Files validated: {results['validated']}") print(f"Valid headers: {results['valid']}") print(f"Invalid headers: {results['invalid']}") print(f"Exempt files: {results['exempt']}") print(f"Generated files: {results['generated']}") print() if results['invalid'] > 0: print("FILES WITH ISSUES:") print("-" * 70) for file_result in results['files']: if not file_result['valid'] and not file_result['exempt'] and not file_result['generated']: print(f"\n{file_result['path']}") for issue in file_result['issues']: print(f" ✗ {issue}") print() if verbose and results['valid'] > 0: print("\nVALID FILES:") print("-" * 70) for file_result in results['files']: if file_result['valid']: print(f" ✓ {file_result['path']}") print() print("=" * 70) if results['invalid'] > 0: compliance_rate = (results['valid'] / results['validated'] * 100) if results['validated'] > 0 else 0 print(f"Compliance Rate: {compliance_rate:.1f}%") print() print("ACTION REQUIRED: Fix files with missing or invalid headers") return False else: print("✓ All validated files have proper headers") return True def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser( description='Validate copyright headers and file information' ) parser.add_argument( '--path', default='.', help='Path to repository (default: current directory)' ) parser.add_argument( '--verbose', action='store_true', help='Show all files including valid ones' ) parser.add_argument( '--fail-on-invalid', action='store_true', help='Exit with error code if invalid headers found' ) args = parser.parse_args() repo_path = Path(args.path).resolve() if not repo_path.exists(): print(f"Error: Path does not exist: {repo_path}", file=sys.stderr) sys.exit(1) print(f"Validating files in: {repo_path}") print() results = validate_repository(repo_path) success = print_report(results, args.verbose) if args.fail_on_invalid and not success: sys.exit(1) sys.exit(0 if success else 0) if __name__ == "__main__": main()