chore: sync workflows, scripts, and configurations from MokoStandards

This commit is contained in:
Moko Standards Bot
2026-01-30 02:15:07 +00:00
parent 2436054ae6
commit 0f2c0c1166
16 changed files with 5064 additions and 28 deletions

View File

@@ -0,0 +1,451 @@
#!/usr/bin/env python3
"""
Copyright (C) 2025 Moko Consulting <hello@mokoconsulting.tech>
This file is part of a Moko Consulting project.
SPDX-LICENSE-IDENTIFIER: GPL-3.0-or-later
This program is free software; you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License (./LICENSE).
# FILE INFORMATION
DEFGROUP: MokoStandards
INGROUP: MokoStandards.Scripts
REPO: https://github.com/mokoconsulting-tech/MokoStandards/
VERSION: 05.00.00
PATH: ./scripts/release_version.py
BRIEF: Script to release a version by moving UNRELEASED items to versioned section
NOTE: Updates CHANGELOG.md and optionally updates VERSION in files
"""
import argparse
import json
import re
import subprocess
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Optional, Tuple
class VersionReleaser:
"""Manages version releases in CHANGELOG.md and updates VERSION in files."""
UNRELEASED_PATTERN = r"## \[Unreleased\]" # Standard Keep a Changelog format
VERSION_PATTERN = r"## \[(\d+\.\d+\.\d+)\]"
VERSION_HEADER_PATTERN = r"VERSION:\s*(\d+\.\d+\.\d+)"
CHANGELOG_H1_PATTERN = r"^# CHANGELOG - .+ \(VERSION: (\d+\.\d+\.\d+)\)" # H1 format
def __init__(self, changelog_path: Path, repo_root: Path):
"""
Initialize the version releaser.
Args:
changelog_path: Path to CHANGELOG.md file
repo_root: Path to repository root
"""
self.changelog_path = changelog_path
self.repo_root = repo_root
self.lines: List[str] = []
def read_changelog(self) -> bool:
"""Read the changelog file."""
try:
with open(self.changelog_path, "r", encoding="utf-8") as f:
self.lines = f.readlines()
return True
except FileNotFoundError:
print(f"Error: CHANGELOG.md not found at {self.changelog_path}", file=sys.stderr)
return False
except Exception as e:
print(f"Error reading CHANGELOG.md: {e}", file=sys.stderr)
return False
def write_changelog(self) -> bool:
"""Write the updated changelog back to file."""
try:
with open(self.changelog_path, "w", encoding="utf-8") as f:
f.writelines(self.lines)
return True
except Exception as e:
print(f"Error writing CHANGELOG.md: {e}", file=sys.stderr)
return False
def find_unreleased_section(self) -> Optional[int]:
"""Find the UNRELEASED section in the changelog."""
for i, line in enumerate(self.lines):
if re.match(self.UNRELEASED_PATTERN, line):
return i
return None
def find_next_version_section(self, start_index: int) -> Optional[int]:
"""Find the next version section after a given index."""
for i in range(start_index + 1, len(self.lines)):
if re.match(self.VERSION_PATTERN, self.lines[i]):
return i
return None
def has_unreleased_content(self, unreleased_index: int, next_version_index: Optional[int]) -> bool:
"""Check if UNRELEASED section has any content."""
end_index = next_version_index if next_version_index else len(self.lines)
for i in range(unreleased_index + 1, end_index):
line = self.lines[i].strip()
# Skip empty lines and headers
if line and not line.startswith("##"):
return True
return False
def validate_version(self, version: str) -> bool:
"""Validate version format (XX.YY.ZZ)."""
pattern = r"^\d{2}\.\d{2}\.\d{2}$"
return bool(re.match(pattern, version))
def release_version(self, version: str, date: Optional[str] = None) -> bool:
"""
Move UNRELEASED content to a new version section.
Args:
version: Version number (XX.YY.ZZ format)
date: Release date (YYYY-MM-DD format), defaults to today
Returns:
True if successful, False otherwise
"""
if not self.validate_version(version):
print(f"Error: Invalid version format '{version}'. Must be XX.YY.ZZ (e.g., 05.01.00)",
file=sys.stderr)
return False
if date is None:
date = datetime.now().strftime("%Y-%m-%d")
unreleased_index = self.find_unreleased_section()
if unreleased_index is None:
print("Error: UNRELEASED section not found in CHANGELOG.md", file=sys.stderr)
return False
next_version_index = self.find_next_version_section(unreleased_index)
# Check if UNRELEASED has content
if not self.has_unreleased_content(unreleased_index, next_version_index):
print("Warning: UNRELEASED section is empty. Nothing to release.", file=sys.stderr)
return False
# Get the content between UNRELEASED and next version
if next_version_index:
unreleased_content = self.lines[unreleased_index + 1:next_version_index]
else:
unreleased_content = self.lines[unreleased_index + 1:]
# Remove the old UNRELEASED content
if next_version_index:
del self.lines[unreleased_index + 1:next_version_index]
else:
del self.lines[unreleased_index + 1:]
# Insert new version section after UNRELEASED
new_version_lines = [
"\n",
f"## [{version}] - {date}\n"
]
new_version_lines.extend(unreleased_content)
# Insert after UNRELEASED heading
insert_index = unreleased_index + 1
for line in reversed(new_version_lines):
self.lines.insert(insert_index, line)
# Update H1 header version
self.update_changelog_h1_version(version)
return True
def update_changelog_h1_version(self, version: str) -> bool:
"""
Update the version in the H1 header of CHANGELOG.
Format: # CHANGELOG - RepoName (VERSION: X.Y.Z)
Args:
version: New version number
Returns:
True if updated, False otherwise
"""
for i, line in enumerate(self.lines):
if re.match(self.CHANGELOG_H1_PATTERN, line):
# Extract repo name from current H1
match = re.match(r"^# CHANGELOG - (.+) \(VERSION: \d+\.\d+\.\d+\)", line)
if match:
repo_name = match.group(1)
self.lines[i] = f"# CHANGELOG - {repo_name} (VERSION: {version})\n"
return True
return False
def update_file_versions(self, version: str, dry_run: bool = False) -> List[Path]:
"""
Update VERSION in all files in the repository.
Args:
version: New version number
dry_run: If True, don't actually update files
Returns:
List of files that were (or would be) updated
"""
updated_files = []
# Find all markdown, Python, and text files
patterns = ["**/*.md", "**/*.py", "**/*.txt", "**/*.yml", "**/*.yaml"]
files_to_check = []
for pattern in patterns:
files_to_check.extend(self.repo_root.glob(pattern))
for file_path in files_to_check:
# Skip certain directories
skip_dirs = [".git", "node_modules", "vendor", "__pycache__", ".venv"]
if any(skip_dir in file_path.parts for skip_dir in skip_dirs):
continue
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
# Check if file has VERSION header
if re.search(self.VERSION_HEADER_PATTERN, content):
new_content = re.sub(
self.VERSION_HEADER_PATTERN,
f"VERSION: {version}",
content
)
if new_content != content:
if not dry_run:
with open(file_path, "w", encoding="utf-8") as f:
f.write(new_content)
updated_files.append(file_path.relative_to(self.repo_root))
except (UnicodeDecodeError, PermissionError):
# Skip binary files or files we can't read
continue
except Exception as e:
print(f"Warning: Error processing {file_path}: {e}", file=sys.stderr)
continue
return updated_files
def extract_release_notes(self, version: str) -> Optional[str]:
"""
Extract release notes for a specific version from CHANGELOG.
Args:
version: Version number to extract notes for
Returns:
Release notes content or None if not found
"""
version_pattern = rf"## \[{re.escape(version)}\]"
notes_lines = []
in_version = False
for line in self.lines:
if re.match(version_pattern, line):
in_version = True
continue
elif in_version:
# Stop at next version heading
if line.startswith("## ["):
break
notes_lines.append(line)
if notes_lines:
return "".join(notes_lines).strip()
return None
def create_github_release(self, version: str, dry_run: bool = False) -> bool:
"""
Create a GitHub release using gh CLI.
Args:
version: Version number
dry_run: If True, don't actually create release
Returns:
True if successful, False otherwise
"""
# Check if gh CLI is available
try:
subprocess.run(["gh", "--version"], capture_output=True, check=True)
except (subprocess.CalledProcessError, FileNotFoundError):
print("Warning: gh CLI not found. Skipping GitHub release creation.", file=sys.stderr)
print("Install gh CLI: https://cli.github.com/", file=sys.stderr)
return False
# Extract release notes from changelog
release_notes = self.extract_release_notes(version)
if not release_notes:
print(f"Warning: Could not extract release notes for version {version}", file=sys.stderr)
release_notes = f"Release {version}"
tag_name = f"v{version}"
title = f"Release {version}"
if dry_run:
print(f"\n[DRY RUN] Would create GitHub release:")
print(f" Tag: {tag_name}")
print(f" Title: {title}")
print(f" Notes:\n{release_notes[:200]}...")
return True
try:
# Create the release
cmd = [
"gh", "release", "create", tag_name,
"--title", title,
"--notes", release_notes
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
print(f"\nSuccessfully created GitHub release: {tag_name}")
print(f"Release URL: {result.stdout.strip()}")
return True
except subprocess.CalledProcessError as e:
print(f"Error creating GitHub release: {e.stderr}", file=sys.stderr)
return False
def main() -> int:
"""Main entry point for the version release script."""
parser = argparse.ArgumentParser(
description="Release a version by moving UNRELEASED items to versioned section",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Release version 05.01.00 with today's date
%(prog)s --version 05.01.00
# Release version with specific date
%(prog)s --version 05.01.00 --date 2026-01-15
# Release and update VERSION in all files
%(prog)s --version 05.01.00 --update-files
# Release, update files, and create GitHub release
%(prog)s --version 05.01.00 --update-files --create-release
# Dry run to see what would be updated
%(prog)s --version 05.01.00 --update-files --create-release --dry-run
Version format: XX.YY.ZZ (e.g., 05.01.00)
"""
)
parser.add_argument(
"--version",
type=str,
required=True,
help="Version number in XX.YY.ZZ format (e.g., 05.01.00)"
)
parser.add_argument(
"--date",
type=str,
help="Release date in YYYY-MM-DD format (defaults to today)"
)
parser.add_argument(
"--changelog",
type=Path,
default=Path("CHANGELOG.md"),
help="Path to CHANGELOG.md file (default: ./CHANGELOG.md)"
)
parser.add_argument(
"--update-files",
action="store_true",
help="Update VERSION header in all repository files"
)
parser.add_argument(
"--create-release",
action="store_true",
help="Create a GitHub release using gh CLI"
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be done without making changes"
)
args = parser.parse_args()
# Find repository root
current_dir = Path.cwd()
repo_root = current_dir
while repo_root.parent != repo_root:
if (repo_root / ".git").exists():
break
repo_root = repo_root.parent
else:
repo_root = current_dir
# Resolve changelog path
if not args.changelog.is_absolute():
changelog_path = repo_root / args.changelog
else:
changelog_path = args.changelog
releaser = VersionReleaser(changelog_path, repo_root)
if not releaser.read_changelog():
return 1
# Release the version
if args.dry_run:
print(f"[DRY RUN] Would release version {args.version}")
else:
if releaser.release_version(args.version, args.date):
if releaser.write_changelog():
print(f"Successfully released version {args.version} in CHANGELOG.md")
else:
return 1
else:
return 1
# Update file versions if requested
if args.update_files:
updated_files = releaser.update_file_versions(args.version, args.dry_run)
if updated_files:
if args.dry_run:
print(f"\n[DRY RUN] Would update VERSION in {len(updated_files)} files:")
else:
print(f"\nUpdated VERSION to {args.version} in {len(updated_files)} files:")
for file_path in sorted(updated_files):
print(f" - {file_path}")
else:
print("\nNo files with VERSION headers found to update.")
# Create GitHub release if requested
if args.create_release:
if not releaser.create_github_release(args.version, args.dry_run):
print("\nNote: GitHub release creation failed or was skipped.", file=sys.stderr)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,319 @@
#!/usr/bin/env python3
"""
Copyright (C) 2025 Moko Consulting <hello@mokoconsulting.tech>
This file is part of a Moko Consulting project.
SPDX-LICENSE-IDENTIFIER: GPL-3.0-or-later
This program is free software; you can redistribute it and/or modify it under the terms
of the GNU General Public License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License (./LICENSE).
# FILE INFORMATION
DEFGROUP: MokoStandards
INGROUP: MokoStandards.Scripts
REPO: https://github.com/mokoconsulting-tech/MokoStandards/
VERSION: 05.00.00
PATH: ./scripts/update_changelog.py
BRIEF: Script to update CHANGELOG.md with entries to UNRELEASED section
NOTE: Follows Keep a Changelog format, supports Added/Changed/Deprecated/Removed/Fixed/Security
"""
import argparse
import os
import re
import sys
from datetime import datetime
from pathlib import Path
from typing import List, Optional
class ChangelogUpdater:
"""Updates CHANGELOG.md following Keep a Changelog format."""
VALID_CATEGORIES = ["Added", "Changed", "Deprecated", "Removed", "Fixed", "Security"]
UNRELEASED_PATTERN = r"## \[Unreleased\]" # Standard Keep a Changelog format
def __init__(self, changelog_path: Path):
"""
Initialize the changelog updater.
Args:
changelog_path: Path to CHANGELOG.md file
"""
self.changelog_path = changelog_path
self.lines: List[str] = []
def read_changelog(self) -> bool:
"""
Read the changelog file.
Returns:
True if successful, False otherwise
"""
try:
with open(self.changelog_path, "r", encoding="utf-8") as f:
self.lines = f.readlines()
return True
except FileNotFoundError:
print(f"Error: CHANGELOG.md not found at {self.changelog_path}", file=sys.stderr)
return False
except Exception as e:
print(f"Error reading CHANGELOG.md: {e}", file=sys.stderr)
return False
def find_unreleased_section(self) -> Optional[int]:
"""
Find the UNRELEASED section in the changelog.
Returns:
Line index of UNRELEASED section, or None if not found
"""
for i, line in enumerate(self.lines):
if re.match(self.UNRELEASED_PATTERN, line):
return i
return None
def find_next_version_section(self, start_index: int) -> Optional[int]:
"""
Find the next version section after UNRELEASED.
Args:
start_index: Index to start searching from
Returns:
Line index of next version section, or None if not found
"""
version_pattern = r"## \[\d+\.\d+\.\d+\]"
for i in range(start_index + 1, len(self.lines)):
if re.match(version_pattern, self.lines[i]):
return i
return None
def get_category_index(self, unreleased_index: int, next_version_index: Optional[int],
category: str) -> Optional[int]:
"""
Find the index of a specific category within UNRELEASED section.
Args:
unreleased_index: Index of UNRELEASED heading
next_version_index: Index of next version section (or None)
category: Category name (e.g., "Added", "Changed")
Returns:
Line index of category heading, or None if not found
"""
end_index = next_version_index if next_version_index else len(self.lines)
category_pattern = rf"### {category}"
for i in range(unreleased_index + 1, end_index):
if re.match(category_pattern, self.lines[i]):
return i
return None
def add_entry(self, category: str, entry: str, subcategory: Optional[str] = None) -> bool:
"""
Add an entry to the UNRELEASED section.
Args:
category: Category (Added/Changed/Deprecated/Removed/Fixed/Security)
entry: Entry text to add
subcategory: Optional subcategory/subheading
Returns:
True if successful, False otherwise
"""
if category not in self.VALID_CATEGORIES:
print(f"Error: Invalid category '{category}'. Must be one of: {', '.join(self.VALID_CATEGORIES)}",
file=sys.stderr)
return False
unreleased_index = self.find_unreleased_section()
if unreleased_index is None:
print("Error: UNRELEASED section not found in CHANGELOG.md", file=sys.stderr)
return False
next_version_index = self.find_next_version_section(unreleased_index)
category_index = self.get_category_index(unreleased_index, next_version_index, category)
# Format entry with proper indentation
if subcategory:
formatted_entry = f" - **{subcategory}**: {entry}\n"
else:
formatted_entry = f"- {entry}\n"
if category_index is None:
# Category doesn't exist, create it
# Find insertion point (after UNRELEASED heading, before next section)
insert_index = unreleased_index + 1
# Skip any blank lines after UNRELEASED
while insert_index < len(self.lines) and self.lines[insert_index].strip() == "":
insert_index += 1
# Insert category heading and entry
self.lines.insert(insert_index, f"### {category}\n")
self.lines.insert(insert_index + 1, formatted_entry)
self.lines.insert(insert_index + 2, "\n")
else:
# Category exists, add entry after the category heading
insert_index = category_index + 1
# Skip existing entries to add at the end of the category
while insert_index < len(self.lines):
line = self.lines[insert_index]
# Stop if we hit another category or version section
if line.startswith("###") or line.startswith("##"):
break
# Stop if we hit a blank line followed by non-entry content
if line.strip() == "" and insert_index + 1 < len(self.lines):
next_line = self.lines[insert_index + 1]
if next_line.startswith("###") or next_line.startswith("##"):
break
insert_index += 1
# Insert entry before any blank lines
while insert_index > category_index + 1 and self.lines[insert_index - 1].strip() == "":
insert_index -= 1
self.lines.insert(insert_index, formatted_entry)
return True
def write_changelog(self) -> bool:
"""
Write the updated changelog back to file.
Returns:
True if successful, False otherwise
"""
try:
with open(self.changelog_path, "w", encoding="utf-8") as f:
f.writelines(self.lines)
return True
except Exception as e:
print(f"Error writing CHANGELOG.md: {e}", file=sys.stderr)
return False
def display_unreleased(self) -> None:
"""Display the current UNRELEASED section."""
unreleased_index = self.find_unreleased_section()
if unreleased_index is None:
print("UNRELEASED section not found")
return
next_version_index = self.find_next_version_section(unreleased_index)
end_index = next_version_index if next_version_index else len(self.lines)
print("Current UNRELEASED section:")
print("=" * 60)
for i in range(unreleased_index, end_index):
print(self.lines[i], end="")
print("=" * 60)
def main() -> int:
"""
Main entry point for the changelog updater script.
Returns:
Exit code (0 for success, non-zero for error)
"""
parser = argparse.ArgumentParser(
description="Update CHANGELOG.md with entries to UNRELEASED section",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Add a simple entry
%(prog)s --category Added --entry "New feature X"
# Add an entry with subcategory
%(prog)s --category Changed --entry "Updated API endpoints" --subcategory "API"
# Display current UNRELEASED section
%(prog)s --show
Categories: Added, Changed, Deprecated, Removed, Fixed, Security
"""
)
parser.add_argument(
"--changelog",
type=Path,
default=Path("CHANGELOG.md"),
help="Path to CHANGELOG.md file (default: ./CHANGELOG.md)"
)
parser.add_argument(
"--category",
choices=ChangelogUpdater.VALID_CATEGORIES,
help="Category for the entry"
)
parser.add_argument(
"--entry",
type=str,
help="Entry text to add to the changelog"
)
parser.add_argument(
"--subcategory",
type=str,
help="Optional subcategory/subheading for the entry"
)
parser.add_argument(
"--show",
action="store_true",
help="Display the current UNRELEASED section"
)
args = parser.parse_args()
# Resolve changelog path
if not args.changelog.is_absolute():
# Try to find repository root
current_dir = Path.cwd()
repo_root = current_dir
while repo_root.parent != repo_root:
if (repo_root / ".git").exists():
break
repo_root = repo_root.parent
else:
repo_root = current_dir
changelog_path = repo_root / args.changelog
else:
changelog_path = args.changelog
updater = ChangelogUpdater(changelog_path)
if not updater.read_changelog():
return 1
if args.show:
updater.display_unreleased()
return 0
if not args.category or not args.entry:
parser.error("--category and --entry are required (or use --show)")
if updater.add_entry(args.category, args.entry, args.subcategory):
if updater.write_changelog():
print(f"Successfully added entry to UNRELEASED section: [{args.category}] {args.entry}")
return 0
else:
return 1
else:
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,332 @@
#!/usr/bin/env python3
"""
Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
This file is part of a Moko Consulting project.
SPDX-License-Identifier: GPL-3.0-or-later
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
FILE INFORMATION
DEFGROUP: MokoStandards.Scripts
INGROUP: MokoStandards.Validation
REPO: https://github.com/mokoconsulting-tech/MokoStandards
PATH: /scripts/validate_file_headers.py
VERSION: 05.00.00
BRIEF: Validate copyright headers and file information in repository files
"""
import os
import sys
from pathlib import Path
from typing import List, Tuple, Dict
# File extensions that require headers
HEADER_REQUIRED_EXTENSIONS = {
'.py': 'python',
'.php': 'php',
'.md': 'markdown',
'.yml': 'yaml',
'.yaml': 'yaml',
'.sh': 'shell',
'.js': 'javascript',
'.ts': 'typescript',
'.css': 'css',
}
# Files that are exempt from header requirements
EXEMPT_FILES = {
'package.json',
'package-lock.json',
'composer.json',
'composer.lock',
'Gemfile.lock',
'.gitignore',
'.gitattributes',
'.editorconfig',
'LICENSE',
}
# Patterns indicating generated files
GENERATED_PATTERNS = [
'DO NOT EDIT',
'AUTO-GENERATED',
'AUTOGENERATED',
'Generated by',
]
# Required patterns in header
REQUIRED_HEADER_PATTERNS = [
'Copyright (C)',
'Moko Consulting',
'GPL-3.0-or-later',
]
# Required file information patterns
REQUIRED_FILE_INFO_PATTERNS = [
'FILE INFORMATION',
'DEFGROUP:',
'REPO:',
'PATH:',
'VERSION:',
'BRIEF:',
]
# Required markdown metadata patterns
REQUIRED_MARKDOWN_METADATA = [
'## Metadata',
'## Revision History',
]
def is_exempt_file(filepath: Path) -> bool:
"""Check if file is exempt from header requirements."""
if filepath.name in EXEMPT_FILES:
return True
# Check if in vendor or node_modules
if 'vendor' in filepath.parts or 'node_modules' in filepath.parts:
return True
# Check if in .git directory
if '.git' in filepath.parts:
return True
return False
def is_generated_file(content: str) -> bool:
"""Check if file appears to be auto-generated."""
first_lines = content[:1000]
return any(pattern in first_lines for pattern in GENERATED_PATTERNS)
def check_copyright_header(content: str, filepath: Path) -> Tuple[bool, List[str]]:
"""Check if file has proper copyright header."""
issues = []
first_section = content[:2000]
for pattern in REQUIRED_HEADER_PATTERNS:
if pattern not in first_section:
issues.append(f"Missing required pattern: {pattern}")
return len(issues) == 0, issues
def check_file_information(content: str, filepath: Path) -> Tuple[bool, List[str]]:
"""Check if file has proper file information block."""
issues = []
first_section = content[:2000]
for pattern in REQUIRED_FILE_INFO_PATTERNS:
if pattern not in first_section:
issues.append(f"Missing required file info: {pattern}")
return len(issues) == 0, issues
def check_markdown_metadata(content: str, filepath: Path) -> Tuple[bool, List[str]]:
"""Check if markdown file has metadata and revision history."""
issues = []
for pattern in REQUIRED_MARKDOWN_METADATA:
if pattern not in content:
issues.append(f"Missing required section: {pattern}")
return len(issues) == 0, issues
def validate_file(filepath: Path) -> Dict[str, any]:
"""Validate a single file."""
result = {
'path': str(filepath),
'valid': True,
'issues': [],
'exempt': False,
'generated': False,
}
# Check if exempt
if is_exempt_file(filepath):
result['exempt'] = True
return result
# Check file extension
if filepath.suffix not in HEADER_REQUIRED_EXTENSIONS:
result['exempt'] = True
return result
# Read file content
try:
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
except Exception as e:
result['valid'] = False
result['issues'].append(f"Error reading file: {e}")
return result
# Check if generated
if is_generated_file(content):
result['generated'] = True
return result
# Check copyright header
valid, issues = check_copyright_header(content, filepath)
if not valid:
result['valid'] = False
result['issues'].extend(issues)
# Check file information
valid, issues = check_file_information(content, filepath)
if not valid:
result['valid'] = False
result['issues'].extend(issues)
# Additional checks for markdown files
if filepath.suffix == '.md':
valid, issues = check_markdown_metadata(content, filepath)
if not valid:
result['valid'] = False
result['issues'].extend(issues)
return result
def validate_repository(repo_path: Path) -> Dict[str, any]:
"""Validate all files in repository."""
results = {
'total': 0,
'validated': 0,
'valid': 0,
'invalid': 0,
'exempt': 0,
'generated': 0,
'files': [],
}
# Find all tracked files
for filepath in repo_path.rglob('*'):
if not filepath.is_file():
continue
results['total'] += 1
result = validate_file(filepath)
results['files'].append(result)
if result['exempt']:
results['exempt'] += 1
elif result['generated']:
results['generated'] += 1
else:
results['validated'] += 1
if result['valid']:
results['valid'] += 1
else:
results['invalid'] += 1
return results
def print_report(results: Dict[str, any], verbose: bool = False):
"""Print validation report."""
print("=" * 70)
print("FILE HEADER VALIDATION REPORT")
print("=" * 70)
print()
print(f"Total files found: {results['total']}")
print(f"Files validated: {results['validated']}")
print(f"Valid headers: {results['valid']}")
print(f"Invalid headers: {results['invalid']}")
print(f"Exempt files: {results['exempt']}")
print(f"Generated files: {results['generated']}")
print()
if results['invalid'] > 0:
print("FILES WITH ISSUES:")
print("-" * 70)
for file_result in results['files']:
if not file_result['valid'] and not file_result['exempt'] and not file_result['generated']:
print(f"\n{file_result['path']}")
for issue in file_result['issues']:
print(f"{issue}")
print()
if verbose and results['valid'] > 0:
print("\nVALID FILES:")
print("-" * 70)
for file_result in results['files']:
if file_result['valid']:
print(f"{file_result['path']}")
print()
print("=" * 70)
if results['invalid'] > 0:
compliance_rate = (results['valid'] / results['validated'] * 100) if results['validated'] > 0 else 0
print(f"Compliance Rate: {compliance_rate:.1f}%")
print()
print("ACTION REQUIRED: Fix files with missing or invalid headers")
return False
else:
print("✓ All validated files have proper headers")
return True
def main():
"""Main entry point."""
import argparse
parser = argparse.ArgumentParser(
description='Validate copyright headers and file information'
)
parser.add_argument(
'--path',
default='.',
help='Path to repository (default: current directory)'
)
parser.add_argument(
'--verbose',
action='store_true',
help='Show all files including valid ones'
)
parser.add_argument(
'--fail-on-invalid',
action='store_true',
help='Exit with error code if invalid headers found'
)
args = parser.parse_args()
repo_path = Path(args.path).resolve()
if not repo_path.exists():
print(f"Error: Path does not exist: {repo_path}", file=sys.stderr)
sys.exit(1)
print(f"Validating files in: {repo_path}")
print()
results = validate_repository(repo_path)
success = print_report(results, args.verbose)
if args.fail_on_invalid and not success:
sys.exit(1)
sys.exit(0 if success else 0)
if __name__ == "__main__":
main()