Files
MokoSuiteOpenGraph/source/packages/com_mokoog/src/Controller/BatchController.php
T
jmiller 7532446e46
Generic: Project CI / Lint & Validate (pull_request) Successful in 13s
Universal: PR Check / Branch Policy (pull_request) Successful in 1s
Universal: PR Check / Secret Scan (pull_request) Successful in 6s
Universal: PR Check / Validate PR (pull_request) Failing after 5s
Branch Cleanup / Delete merged branch (pull_request) Failing after 1s
RC Revert / Rename rc/ back to dev/ (pull_request) Has been skipped
Joomla: Metadata Validation / Validate Joomla Metadata (pull_request) Failing after 10s
Generic: Project CI / Tests (pull_request) Has been cancelled
Universal: PR Check / Build RC Package (pull_request) Has been cancelled
Universal: PR Check / Report Issues (pull_request) Has been cancelled
refactor: sitemap throttle + SEF URLs (#100) and batch cursor pagination (#106)
#106 — BatchController now paginates by id cursor (WHERE c.id > :lastId)
instead of always querying offset 0. A row that fails to insert falls behind
the cursor and is not re-fetched, so the batch always terminates and reaches
100% even with persistent failures. process() returns examined + last_id; the
editor JS drives the cursor and stops when a chunk examines 0 rows.

#100 — Sitemap:
- Throttle: regenerate at most once per 60s on content save (SITEMAP_MIN_INTERVAL)
  so bulk edits/imports don't rebuild the whole file every save
- SEF URLs: route each article via Route::link('site', ...) with a fallback to
  the non-SEF index.php URL if routing fails (worst case = prior behavior)
(access-level filtering + atomic write were done earlier in the cycle)
2026-06-29 12:17:37 -05:00

202 lines
6.8 KiB
PHP

<?php
/**
* @package MokoSuiteOpenGraph
* @subpackage com_mokoog
* @author Moko Consulting <hello@mokoconsulting.tech>
* @copyright Copyright (C) 2026 Moko Consulting. All rights reserved.
* @license GNU General Public License version 3 or later; see LICENSE
*/
namespace Joomla\Component\MokoOG\Administrator\Controller;
defined('_JEXEC') or die;
use Joomla\CMS\Factory;
use Joomla\CMS\Language\Text;
use Joomla\CMS\MVC\Controller\BaseController;
use Joomla\CMS\Response\JsonResponse;
use Joomla\CMS\Session\Session;
class BatchController extends BaseController
{
/**
* Count the total articles eligible for batch generation.
*
* @return void
*/
public function count(): void
{
Session::checkToken('get') || throw new \RuntimeException(Text::_('JINVALID_TOKEN'), 403);
$identity = Factory::getApplication()->getIdentity();
if (!$identity->authorise('mokoog.batch', 'com_mokoog')
&& !$identity->authorise('core.create', 'com_mokoog')) {
throw new \RuntimeException(Text::_('JLIB_APPLICATION_ERROR_ACCESS_FORBIDDEN'), 403);
}
$db = Factory::getContainer()->get(\Joomla\Database\DatabaseInterface::class);
$query = $db->getQuery(true)
->select('COUNT(*)')
->from($db->quoteName('#__content', 'c'))
->leftJoin(
$db->quoteName('#__mokoog_tags', 't')
. ' ON ' . $db->quoteName('t.content_type') . ' = ' . $db->quote('com_content')
. ' AND ' . $db->quoteName('t.content_id') . ' = ' . $db->quoteName('c.id')
)
->where($db->quoteName('c.state') . ' = 1')
->where($db->quoteName('t.id') . ' IS NULL');
$db->setQuery($query);
$total = (int) $db->loadResult();
echo new JsonResponse(['total' => $total]);
Factory::getApplication()->close();
}
/**
* Process a chunk of articles for batch OG generation.
*
* @return void
*/
public function process(): void
{
Session::checkToken('get') || throw new \RuntimeException(Text::_('JINVALID_TOKEN'), 403);
$identity = Factory::getApplication()->getIdentity();
if (!$identity->authorise('mokoog.batch', 'com_mokoog')
&& !$identity->authorise('core.create', 'com_mokoog')) {
throw new \RuntimeException(Text::_('JLIB_APPLICATION_ERROR_ACCESS_FORBIDDEN'), 403);
}
$app = Factory::getApplication();
$input = $app->getInput();
$limit = min($input->getInt('limit', 50), 200);
$lastId = max(0, $input->getInt('lastid', 0));
$db = Factory::getContainer()->get(\Joomla\Database\DatabaseInterface::class);
$query = $db->getQuery(true)
->select($db->quoteName([
'c.id', 'c.title', 'c.metadesc', 'c.introtext', 'c.fulltext', 'c.images',
]))
->from($db->quoteName('#__content', 'c'))
->leftJoin(
$db->quoteName('#__mokoog_tags', 't')
. ' ON ' . $db->quoteName('t.content_type') . ' = ' . $db->quote('com_content')
. ' AND ' . $db->quoteName('t.content_id') . ' = ' . $db->quoteName('c.id')
)
->where($db->quoteName('c.state') . ' = 1')
->where($db->quoteName('t.id') . ' IS NULL')
->where($db->quoteName('c.id') . ' > ' . $lastId)
->order($db->quoteName('c.id') . ' ASC');
// Cursor-based pagination by id: each chunk fetches the next articles whose
// id is greater than the previous chunk's highest id. A row that fails to
// insert is passed over on the next chunk (its id is already behind the
// cursor) instead of being re-fetched forever, so the batch always reaches
// the end. The client stops when a chunk examines 0 rows.
$db->setQuery($query, 0, $limit);
$articles = $db->loadObjectList();
$created = 0;
$skipped = 0;
$lastProcessedId = $lastId;
$now = Factory::getDate()->toSql();
foreach ($articles as $article) {
$lastProcessedId = (int) $article->id;
$ogTitle = $article->title;
$ogDescription = $this->extractDescription($article);
$ogImage = $this->extractImage($article);
$record = (object) [
'content_type' => 'com_content',
'content_id' => (int) $article->id,
'og_title' => $ogTitle,
'og_description' => $ogDescription,
'og_image' => $ogImage,
'og_type' => 'article',
'seo_title' => '',
'meta_description' => $article->metadesc ?: '',
'robots' => '',
'canonical_url' => '',
'language' => '*',
'published' => 1,
'created' => $now,
'modified' => $now,
];
try {
$db->insertObject('#__mokoog_tags', $record);
$created++;
} catch (\RuntimeException $e) {
$skipped++;
\Joomla\CMS\Log\Log::add('Batch insert failed for article ' . $article->id . ': ' . $e->getMessage(), \Joomla\CMS\Log\Log::WARNING, 'mokoog');
}
}
echo new JsonResponse([
'created' => $created,
'skipped' => $skipped,
'examined' => \count($articles),
'last_id' => $lastProcessedId,
]);
$app->close();
}
/**
* Extract a description from article content.
*
* @param object $article Article record
*
* @return string
*/
private function extractDescription(object $article): string
{
// Prefer meta description if set
if (!empty($article->metadesc)) {
return $article->metadesc;
}
// Fall back to intro text
$text = $article->introtext ?: $article->fulltext;
$text = strip_tags($text);
$text = trim(preg_replace('/\s+/', ' ', $text));
if (mb_strlen($text) > 160) {
$text = mb_substr($text, 0, 157) . '...';
}
return $text;
}
/**
* Extract the best image from article data.
*
* @param object $article Article record
*
* @return string
*/
private function extractImage(object $article): string
{
if (!empty($article->images)) {
$images = json_decode($article->images, true);
if (!empty($images['image_fulltext'])) {
return $images['image_fulltext'];
}
if (!empty($images['image_intro'])) {
return $images['image_intro'];
}
}
return '';
}
}