diff --git a/scripts/validate/no_secrets.py b/scripts/validate/no_secrets.py new file mode 100644 index 0000000..39e0f29 --- /dev/null +++ b/scripts/validate/no_secrets.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +""" +Scan for accidentally committed secrets and credentials. + +Copyright (C) 2025 Moko Consulting + +This file is part of a Moko Consulting project. + +SPDX-License-Identifier: GPL-3.0-or-later + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program (./LICENSE.md). + +FILE INFORMATION +DEFGROUP: Script.Validate +INGROUP: Security +REPO: https://github.com/mokoconsulting-tech/moko-cassiopeia +PATH: /scripts/validate/no_secrets.py +VERSION: 01.00.00 +BRIEF: Scan for accidentally committed secrets and credentials +NOTE: High-signal pattern detection to prevent credential exposure +""" + +import argparse +import json +import os +import re +import sys +from pathlib import Path +from typing import List, Dict + +# Add lib directory to path +sys.path.insert(0, str(Path(__file__).parent.parent / "lib")) + +try: + import common +except ImportError: + print("ERROR: Cannot import required libraries", file=sys.stderr) + sys.exit(1) + + +# High-signal patterns only. Any match is a hard fail. +SECRET_PATTERNS = [ + # Private keys + r'-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----', + r'PuTTY-User-Key-File-', + # AWS keys + r'AKIA[0-9A-Z]{16}', + r'ASIA[0-9A-Z]{16}', + # GitHub tokens + r'ghp_[A-Za-z0-9]{36}', + r'gho_[A-Za-z0-9]{36}', + r'github_pat_[A-Za-z0-9_]{20,}', + # Slack tokens + r'xox[baprs]-[0-9A-Za-z-]{10,48}', + # Stripe keys + r'sk_live_[0-9a-zA-Z]{20,}', +] + +# Directories to exclude from scanning +EXCLUDE_DIRS = { + 'vendor', + 'node_modules', + 'dist', + 'build', + '.git', +} + + +def scan_file(filepath: Path, patterns: List[re.Pattern]) -> List[Dict[str, str]]: + """ + Scan a file for secret patterns. + + Args: + filepath: Path to file to scan + patterns: Compiled regex patterns to search for + + Returns: + List of matches with file, line number, and content + """ + hits = [] + + try: + with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: + for line_num, line in enumerate(f, 1): + for pattern in patterns: + if pattern.search(line): + hits.append({ + 'file': str(filepath), + 'line': line_num, + 'content': line.strip()[:100] # Limit to 100 chars + }) + except Exception as e: + common.log_warn(f"Could not read {filepath}: {e}") + + return hits + + +def scan_directory(src_dir: str, patterns: List[re.Pattern]) -> List[Dict[str, str]]: + """ + Recursively scan directory for secrets. + + Args: + src_dir: Directory to scan + patterns: Compiled regex patterns + + Returns: + List of all matches + """ + src_path = Path(src_dir) + all_hits = [] + + for item in src_path.rglob("*"): + # Skip directories + if not item.is_file(): + continue + + # Skip excluded directories + if any(excluded in item.parts for excluded in EXCLUDE_DIRS): + continue + + # Skip binary files (heuristic) + try: + with open(item, 'rb') as f: + chunk = f.read(1024) + if b'\x00' in chunk: # Contains null bytes = likely binary + continue + except Exception: + continue + + # Scan the file + hits = scan_file(item, patterns) + all_hits.extend(hits) + + return all_hits + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Scan for accidentally committed secrets and credentials" + ) + parser.add_argument( + "-s", "--src-dir", + default=os.environ.get("SRC_DIR", "src"), + help="Source directory to scan (default: src)" + ) + + args = parser.parse_args() + + # Check if source directory exists + if not Path(args.src_dir).is_dir(): + result = { + "status": "fail", + "error": "src directory missing" + } + common.json_output(result) + return 1 + + # Compile patterns + compiled_patterns = [re.compile(pattern) for pattern in SECRET_PATTERNS] + + # Scan directory + hits = scan_directory(args.src_dir, compiled_patterns) + + if hits: + # Limit to first 50 hits + hits = hits[:50] + + result = { + "status": "fail", + "error": "secret_pattern_detected", + "hits": [{"hit": f"{h['file']}:{h['line']}: {h['content']}"} for h in hits] + } + + print(json.dumps(result)) + + # Also print human-readable output + print("\nERROR: Potential secrets detected!", file=sys.stderr) + print(f"\nFound {len(hits)} potential secret(s):", file=sys.stderr) + for hit in hits[:10]: # Show first 10 in detail + print(f" {hit['file']}:{hit['line']}", file=sys.stderr) + print(f" {hit['content']}", file=sys.stderr) + + if len(hits) > 10: + print(f" ... and {len(hits) - 10} more", file=sys.stderr) + + print("\nPlease remove any secrets and use environment variables or secret management instead.", file=sys.stderr) + + return 1 + + result = { + "status": "ok", + "src_dir": args.src_dir + } + common.json_output(result) + print("no_secrets: ok") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/validate/tabs.py b/scripts/validate/tabs.py new file mode 100644 index 0000000..fefff29 --- /dev/null +++ b/scripts/validate/tabs.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +""" +Detect TAB characters in YAML files where they are not allowed. + +Copyright (C) 2025 Moko Consulting + +This file is part of a Moko Consulting project. + +SPDX-License-Identifier: GPL-3.0-or-later + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program (./LICENSE.md). + +FILE INFORMATION +DEFGROUP: Script.Validate +INGROUP: Code.Quality +REPO: https://github.com/mokoconsulting-tech/moko-cassiopeia +PATH: /scripts/validate/tabs.py +VERSION: 01.00.00 +BRIEF: Detect TAB characters in YAML files where they are not allowed +NOTE: YAML specification forbids tab characters +""" + +import subprocess +import sys +from pathlib import Path +from typing import List, Tuple + +# Add lib directory to path +sys.path.insert(0, str(Path(__file__).parent.parent / "lib")) + +try: + import common +except ImportError: + print("ERROR: Cannot import required libraries", file=sys.stderr) + sys.exit(1) + + +def get_yaml_files() -> List[str]: + """ + Get list of YAML files tracked by git. + + Returns: + List of YAML file paths + """ + try: + result = common.run_command( + ["git", "ls-files", "*.yml", "*.yaml"], + capture_output=True, + check=True + ) + files = [f.strip() for f in result.stdout.split('\n') if f.strip()] + return files + except subprocess.CalledProcessError: + return [] + + +def check_tabs_in_file(filepath: str) -> List[Tuple[int, str]]: + """ + Check for tab characters in a file. + + Args: + filepath: Path to file to check + + Returns: + List of (line_number, line_content) tuples with tabs + """ + tabs_found = [] + + try: + with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: + for line_num, line in enumerate(f, 1): + if '\t' in line: + tabs_found.append((line_num, line.rstrip())) + except Exception as e: + common.log_warn(f"Could not read {filepath}: {e}") + + return tabs_found + + +def main() -> int: + """Main entry point.""" + yaml_files = get_yaml_files() + + if not yaml_files: + print("No files to check") + return 0 + + bad_files = [] + all_violations = {} + + for filepath in yaml_files: + tabs = check_tabs_in_file(filepath) + if tabs: + bad_files.append(filepath) + all_violations[filepath] = tabs + + print(f"TAB found in {filepath}", file=sys.stderr) + print(" Lines with tabs:", file=sys.stderr) + + # Show first 5 lines with tabs + for line_num, line_content in tabs[:5]: + print(f" {line_num}: {line_content[:80]}", file=sys.stderr) + + if len(tabs) > 5: + print(f" ... and {len(tabs) - 5} more", file=sys.stderr) + print("", file=sys.stderr) + + if bad_files: + print("", file=sys.stderr) + print("ERROR: Tabs found in repository files", file=sys.stderr) + print("", file=sys.stderr) + print("YAML specification forbids tab characters.", file=sys.stderr) + print(f"Found tabs in {len(bad_files)} file(s):", file=sys.stderr) + for f in bad_files: + print(f" - {f}", file=sys.stderr) + print("", file=sys.stderr) + print("To fix:", file=sys.stderr) + print(" 1. Run: python3 scripts/fix/tabs.py", file=sys.stderr) + print(" 2. Or manually replace tabs with spaces in your editor", file=sys.stderr) + print(" 3. Configure your editor to use spaces (not tabs) for YAML files", file=sys.stderr) + print("", file=sys.stderr) + return 2 + + print("tabs: ok") + return 0 + + +if __name__ == "__main__": + sys.exit(main())