<?php
/**
 * Copyright (C) 2026 Moko Consulting <hello@mokoconsulting.tech>
 *
 * This file is part of a Moko Consulting project.
 *
 * SPDX-License-Identifier: GPL-3.0-or-later
 *
 * FILE INFORMATION
 * DEFGROUP: MokoStandards.Enterprise
 * INGROUP: MokoStandards
 * REPO: https://git.mokoconsulting.tech/MokoConsulting/moko-platform
 * PATH: /lib/Enterprise/DefinitionParser.php
 * BRIEF: Parses Terraform HCL repository definition files into a flat sync-file list
 */

declare(strict_types=1);

namespace MokoEnterprise;

/**
 * Definition Parser
 *
 * Parses the Terraform HCL repository definition files stored in
 * definitions/default/ and returns a flat list of file sync entries.
 *
 * File blocks that carry either a `template` field (external file path) or a
 * `stub_content` heredoc (inline content) are returned — these are the files
 * that the bulk-sync process should push to remote repositories.
 *
 * When both `stub_content` and `template` are present in the same block,
 * `stub_content` takes priority (the definition file is authoritative).
 *
 * Each returned entry is an associative array with one of two shapes:
 *
 * External-file entry (legacy, uses `template` path):
 *   'source'           => string  — path relative to the MokoStandards repo root
 *   'destination'      => string  — path in the target repository
 *   'always_overwrite' => bool    — true: overwrite existing file; false: create-only
 *
 * Inline-content entry (uses `stub_content` heredoc):
 *   'inline_content'   => string  — rendered template content (ready to push)
 *   'destination'      => string  — path in the target repository
 *   'always_overwrite' => bool    — true: overwrite existing file; false: create-only
 */
class DefinitionParser
{
	/** Map platform slug → definition file basename */
	private const PLATFORM_DEFINITION_MAP = [
		'crm-module'          => 'crm-module.tf',
		'waas-component'      => 'waas-component.tf',
		'generic-repository'  => 'generic-repository.tf',
		'default-repository'  => 'default-repository.tf',
		'standards'           => 'standards-repository.tf',
	];

	/** Default definition used when platform has no specific file */
	private const FALLBACK_DEFINITION = 'default-repository.tf';

	/** Directory containing the base definition files */
	private const DEFINITIONS_DIR = 'definitions/default';

	// -----------------------------------------------------------------------
	// Public API
	// -----------------------------------------------------------------------

	/**
	 * Parse a definition file by platform slug.
	 *
	 * @param string $platform  e.g. 'crm-module', 'waas-component'
	 * @param string $repoRoot  Absolute path to the MokoStandards repository root
	 * @return array<int, array{source: string, destination: string, always_overwrite: bool}>
	 */
	public function parseForPlatform(string $platform, string $repoRoot): array
	{
		$basename = self::PLATFORM_DEFINITION_MAP[$platform] ?? self::FALLBACK_DEFINITION;
		$path = rtrim($repoRoot, '/') . '/' . self::DEFINITIONS_DIR . '/' . $basename;

		if (!file_exists($path)) {
			$fallback = rtrim($repoRoot, '/') . '/' . self::DEFINITIONS_DIR . '/' . self::FALLBACK_DEFINITION;
			if (!file_exists($fallback)) {
				return [];
			}
			$path = $fallback;
		}

		return $this->parseFile($path);
	}

	/**
	 * Parse a definition file at an explicit filesystem path.
	 *
	 * @param string $filePath  Absolute path to the .tf definition file
	 * @return array<int, array{source: string, destination: string, always_overwrite: bool}>
	 */
	public function parseFile(string $filePath): array
	{
		if (!file_exists($filePath)) {
			return [];
		}

		$content = file_get_contents($filePath);
		if ($content === false) {
			return [];
		}

		return $this->parse($content);
	}

	/**
	 * Parse raw HCL content.
	 *
	 * @param string $content  Raw .tf file content
	 * @return array<int, array{source: string, destination: string, always_overwrite: bool}>
	 */
	public function parse(string $content): array
	{
		$entries = [];

		// root_files = [ { ... }, ... ]
		$rootFilesContent = $this->extractNamedArray($content, 'root_files');
		if ($rootFilesContent !== null) {
			$entries = array_merge($entries, $this->parseFileBlocks($rootFilesContent, ''));
		}

		// directories = [ { ... }, ... ]
		$dirsContent = $this->extractNamedArray($content, 'directories');
		if ($dirsContent !== null) {
			$entries = array_merge($entries, $this->parseDirectories($dirsContent));
		}

		return $entries;
	}

	// -----------------------------------------------------------------------
	// Internal parsing helpers
	// -----------------------------------------------------------------------

	/**
	 * Locate `name = [` inside $content and return the content between the
	 * outermost `[` and its matching `]`, or null if not found.
	 */
	private function extractNamedArray(string $content, string $name): ?string
	{
		$pattern = '/\b' . preg_quote($name, '/') . '\s*=\s*\[/';

		// Build a mask of heredoc regions so the regex doesn't match inside them.
		// Replace heredoc content with spaces (preserving offsets) before matching.
		$masked = $content;
		$len = strlen($content);
		$i = 0;
		while ($i < $len - 1) {
			if ($content[$i] === '<' && $content[$i + 1] === '<') {
				$heredocEnd = $this->skipHeredoc($content, $i, $len);
				// Blank out the heredoc region in the masked copy
				for ($k = $i; $k < $heredocEnd && $k < $len; $k++) {
					$masked[$k] = ($content[$k] === "\n") ? "\n" : ' ';
				}
				$i = $heredocEnd;
				continue;
			}
			$i++;
		}

		if (!preg_match($pattern, $masked, $match, PREG_OFFSET_CAPTURE)) {
			return null;
		}
		// Position of the `[` at the end of the matched string — use original content
		$openPos = $match[0][1] + strlen($match[0][0]) - 1;
		return $this->extractBetweenPair($content, $openPos, '[', ']');
	}

	/**
	 * Starting at $pos (which must hold $open), walk forward counting depth
	 * until the matching $close is found.  Returns the content between them
	 * (exclusive), or null on malformed input.
	 */
	private function extractBetweenPair(string $content, int $pos, string $open, string $close): ?string
	{
		if (!isset($content[$pos]) || $content[$pos] !== $open) {
			return null;
		}

		$depth = 0;
		$start = $pos;
		$len   = strlen($content);

		for ($i = $pos; $i < $len; $i++) {
			// Skip heredoc regions — they contain unbalanced brackets in markdown/code
			if ($content[$i] === '<' && isset($content[$i + 1]) && $content[$i + 1] === '<') {
				$i = $this->skipHeredoc($content, $i, $len) - 1; // -1 because for loop increments
				continue;
			}
			if ($content[$i] === $open) {
				$depth++;
			} elseif ($content[$i] === $close) {
				$depth--;
				if ($depth === 0) {
					return substr($content, $start + 1, $i - $start - 1);
				}
			}
		}

		return null; // unterminated
	}

	/**
	 * Split $content into top-level `{ … }` blocks (depth 1 only).
	 *
	 * Heredoc sections (`<<-WORD … WORD` and `<<WORD … WORD`) are skipped in
	 * their entirety so that any `{` or `}` characters inside template content
	 * do not corrupt the brace-depth counter.
	 *
	 * @return string[]  Each element is the inner content of one block (without outer braces)
	 */
	private function splitBlocks(string $content): array
	{
		$blocks = [];
		$depth  = 0;
		$start  = null;
		$len    = strlen($content);
		$i      = 0;

		while ($i < $len) {
			// Detect heredoc: <<WORD or <<-WORD
			if ($content[$i] === '<' && isset($content[$i + 1]) && $content[$i + 1] === '<') {
				$i = $this->skipHeredoc($content, $i, $len);
				continue;
			}

			if ($content[$i] === '{') {
				if ($depth === 0) {
					$start = $i;
				}
				$depth++;
			} elseif ($content[$i] === '}') {
				$depth--;
				if ($depth === 0 && $start !== null) {
					$blocks[] = substr($content, $start + 1, $i - $start - 1);
					$start = null;
				}
			}
			$i++;
		}

		return $blocks;
	}

	/**
	 * Advance past a HCL heredoc starting at position $i.
	 *
	 * Supports both `<<WORD` (content-preserving) and `<<-WORD`
	 * (indent-stripping) forms.  Returns the index immediately after the
	 * closing delimiter line, or $i + 2 if the heredoc is malformed.
	 */
	private function skipHeredoc(string $content, int $i, int $len): int
	{
		$j = $i + 2; // skip <<

		// Optional indent-strip marker
		$stripIndent = false;
		if (isset($content[$j]) && $content[$j] === '-') {
			$stripIndent = true;
			$j++;
		}

		// Read the delimiter identifier (alphanumeric + underscore)
		$delimiter = '';
		while ($j < $len && (ctype_alnum($content[$j]) || $content[$j] === '_')) {
			$delimiter .= $content[$j];
			$j++;
		}

		if ($delimiter === '') {
			return $i + 2; // Not a real heredoc
		}

		// Skip optional whitespace and the rest of the opening line
		while ($j < $len && $content[$j] !== "\n") {
			$j++;
		}
		if ($j < $len) {
			$j++; // skip the newline after the opening line
		}

		// Scan line by line until the closing delimiter
		while ($j < $len) {
			$lineEnd = strpos($content, "\n", $j);
			$lineEnd = ($lineEnd === false) ? $len : $lineEnd;

			$line = substr($content, $j, $lineEnd - $j);
			// For <<- (indent-stripping) form, the terminator may itself be indented;
			// strip leading whitespace before comparing.  For the non-stripping form
			// (<<), the terminator must be at column 0 — but we still rtrim trailing
			// whitespace/CR to handle Windows line-endings gracefully.
			$normalised = $stripIndent ? trim($line) : rtrim($line);
			if ($normalised === $delimiter) {
				return $lineEnd + 1;
			}
			$j = $lineEnd + 1;
		}

		return $len; // unterminated heredoc — consume to EOF
	}

	/**
	 * Parse all file blocks inside a `files = [ … ]` array content,
	 * returning only those that have a `template` field.
	 *
	 * @param string $arrayContent  Inner content between the outer `[` and `]`
	 * @param string $dirPath       Directory prefix for the destination ('' = repo root)
	 * @return array<int, array{source: string, destination: string, always_overwrite: bool}>
	 */
	private function parseFileBlocks(string $arrayContent, string $dirPath): array
	{
		$entries = [];
		foreach ($this->splitBlocks($arrayContent) as $block) {
			$entry = $this->parseFileBlock($block, $dirPath);
			if ($entry !== null) {
				$entries[] = $entry;
			}
		}
		return $entries;
	}

	/**
	 * Parse a single file block `{ name = "…", template = "…", … }` or
	 * `{ name = "…", stub_content = <<-EOT … EOT, … }`.
	 *
	 * When a `stub_content` heredoc is present it takes priority over a
	 * `template` file-path reference.  Returns null when the block has
	 * neither (structural-only entry that should not be synced).
	 *
	 * @return array{source?: string, inline_content?: string, destination: string, always_overwrite: bool}|null
	 */
	private function parseFileBlock(string $block, string $dirPath): ?array
	{
		// --- try stub_content heredoc first (preferred) ---
		$inlineContent = $this->extractHeredoc($block, 'stub_content');

		// --- fall back to stub_content as a quoted string (e.g. "line1\nline2") ---
		if ($inlineContent === null) {
			if (preg_match('/\bstub_content\s*=\s*"((?:[^"\\\\]|\\\\.)*)"/', $block, $m)) {
				$inlineContent = stripcslashes($m[1]);
			}
		}

		// --- fall back to external template path ---
		$source = null;
		if ($inlineContent === null) {
			if (!preg_match('/\btemplate\s*=\s*"([^"]+)"/', $block, $m)) {
				return null; // neither inline content nor template → structural entry
			}
			$source = $m[1];
		}

		// name is required
		if (!preg_match('/\bname\s*=\s*"([^"]+)"/', $block, $m)) {
			return null;
		}
		$filename = $m[1];

		// destination_filename overrides name
		if (preg_match('/\bdestination_filename\s*=\s*"([^"]+)"/', $block, $m)) {
			$filename = $m[1];
		}

		// destination_path overrides dirPath
		if (preg_match('/\bdestination_path\s*=\s*"([^"]+)"/', $block, $m)) {
			$dp = trim($m[1], '/');
			$destination = ($dp === '' || $dp === '.') ? $filename : "{$dp}/{$filename}";
		} else {
			$destination = $dirPath === '' ? $filename : "{$dirPath}/{$filename}";
		}

		// always_overwrite — default true for all template-driven files
		$alwaysOverwrite = true;
		if (preg_match('/\balways_overwrite\s*=\s*(true|false)\b/', $block, $m)) {
			$alwaysOverwrite = ($m[1] === 'true');
		}

		// protected — when true, file is never overwritten even with --force
		$protected = false;
		if (preg_match('/\bprotected\s*=\s*(true|false)\b/', $block, $m)) {
			$protected = ($m[1] === 'true');
		}

		if ($inlineContent !== null) {
			return [
				'inline_content'   => $inlineContent,
				'destination'      => $destination,
				'always_overwrite' => $alwaysOverwrite,
				'protected'        => $protected,
			];
		}

		return [
			'source'           => $source,
			'destination'      => $destination,
			'always_overwrite' => $alwaysOverwrite,
			'protected'        => $protected,
		];
	}

	/**
	 * Extract a heredoc value for the given field name from a block string.
	 *
	 * Handles both `<<WORD` (content-preserving) and `<<-WORD`
	 * (indent-stripping) forms.  Leading tabs/spaces are stripped uniformly
	 * when the `<<-` form is used, matching HCL semantics.
	 *
	 * Returns null when the field is not found.
	 */
	private function extractHeredoc(string $block, string $field): ?string
	{
		$pattern = '/\b' . preg_quote($field, '/') . '\s*=\s*<<(-?)(\w+)[ \t]*\r?\n(.*?)\r?\n[ \t]*\2[ \t]*(?:\r?\n|$)/s';
		if (!preg_match($pattern, $block, $m)) {
			return null;
		}

		$stripIndent = ($m[1] === '-');
		$rawContent  = $m[3];

		if ($stripIndent) {
			// Determine the minimum leading-whitespace prefix across non-empty lines
			$lines  = explode("\n", $rawContent);
			$minIndent = PHP_INT_MAX;
			foreach ($lines as $line) {
				if (trim($line) === '') {
					continue;
				}
				$indent = strlen($line) - strlen(ltrim($line, " \t"));
				if ($indent < $minIndent) {
					$minIndent = $indent;
				}
			}
			if ($minIndent === PHP_INT_MAX) {
				$minIndent = 0;
			}
			// Strip that many characters from the start of each line
			$lines = array_map(
				static fn(string $l) => (strlen($l) >= $minIndent) ? substr($l, $minIndent) : $l,
				$lines
			);
			$rawContent = implode("\n", $lines);
		}

		return $rawContent;
	}

	/**
	 * Walk the `directories = [ … ]` array, descending into every
	 * `subdirectories` block recursively.
	 *
	 * @return array<int, array{source: string, destination: string, always_overwrite: bool}>
	 */
	private function parseDirectories(string $dirsArrayContent): array
	{
		$entries = [];
		foreach ($this->splitBlocks($dirsArrayContent) as $block) {
			$entries = array_merge($entries, $this->parseDirectoryBlock($block));
		}
		return $entries;
	}

	/**
	 * Process one directory block: extract its path, parse its files, and
	 * recurse into any subdirectories.
	 *
	 * @return array<int, array{source: string, destination: string, always_overwrite: bool}>
	 */
	private function parseDirectoryBlock(string $block): array
	{
		$entries = [];

		// Determine the path prefix for files inside this directory
		$dirPath = '';
		if (preg_match('/\bpath\s*=\s*"([^"]+)"/', $block, $m)) {
			$dirPath = $m[1];
		}

		// files = [ … ] inside this directory
		$filesContent = $this->extractNamedArray($block, 'files');
		if ($filesContent !== null) {
			$entries = array_merge($entries, $this->parseFileBlocks($filesContent, $dirPath));
		}

		// subdirectories = [ … ] — recurse
		$subdirsContent = $this->extractNamedArray($block, 'subdirectories');
		if ($subdirsContent !== null) {
			foreach ($this->splitBlocks($subdirsContent) as $subBlock) {
				$entries = array_merge($entries, $this->parseDirectoryBlock($subBlock));
			}
		}

		return $entries;
	}
}