<?php
namespace App\EventSubscriber;
use Massive\Bundle\SearchBundle\Search\Field;
use Massive\Bundle\SearchBundle\Search\SearchManagerInterface;
use Massive\Bundle\SearchBundle\Search\Event\PreIndexEvent;
use Smalot\PdfParser\Parser;
use Sulu\Bundle\ArticleBundle\Document\ArticleDocument;
use Sulu\Bundle\MediaBundle\Api\Media;
use Sulu\Bundle\MediaBundle\Entity\MediaRepositoryInterface;
use Sulu\Bundle\MediaBundle\Media\Storage\StorageInterface;
use Sulu\Bundle\PageBundle\Document\PageDocument;
use Sulu\Component\DocumentManager\Event\PublishEvent;
use Sulu\Component\DocumentManager\Events;
use Sulu\Component\DocumentManager\Subscriber\EventSubscriberInterface;
class PagePublishSubscriber implements EventSubscriberInterface
{
private MediaRepositoryInterface $media;
private StorageInterface $storage;
private SearchManagerInterface $searchManager;
private string $indexedText = '';
private array $filesUrl = array();
public function __construct(
MediaRepositoryInterface $media,
StorageInterface $storage,
SearchManagerInterface $searchManager
) {
$this->media = $media;
$this->storage = $storage;
$this->searchManager = $searchManager;
}
public static function getSubscribedEvents()
{
return [
Events::PUBLISH => ['onPublish', 0],
'massive_search.pre_index' => ['onPreIndex', -200]
];
}
public function onPublish(PublishEvent $event): void
{
$document = $event->getDocument();
if ($document instanceof PageDocument || $document instanceof ArticleDocument) {
$properties = $document->getStructure()->toArray();
$theUploadedMedia = '';
foreach ($properties as $key => $property) {
// if the property is not an array nor a boolean
if (gettype($property) !== "array") {
if (gettype($property) !== "boolean") {
if ($property === null || $key === "filter") {
// if the property is null then skip the step and continue the loop
continue;
}
// else index the text
$this->indexedText .= $this->cleanerText($property) . " ";
}
// checks only the "main_content" array
} else {
if ($key === "main_content")
foreach ($property as $array) {
if ($array) {
if (array_key_exists('title', $array)) {
$this->indexedText .= $this->cleanerText($array['title']) . " ";
}
if ($array['type'] == "file") {
$fileId = $array['file']['id'];
$theUploadedMedia = $this->media->findMediaById($fileId);
$media = new Media($theUploadedMedia, "fr");
if ($media->getExtension() === "pdf") {
$fileTitle = $media->getTitle();
$fileName = $media->getName();
$fileUrl = "/media/" . $fileId . "/download/" . $fileName;
array_push($this->filesUrl, [$fileUrl, $fileTitle]);
$parser = new Parser();
$storageOption = $media->getStorageOptions();
$path = $this->storage->getPath($storageOption);
$pdf = $parser->parseFile($path);
$text = $pdf->getText();
$newText = $this->decodePdfText($text);
$this->indexedText .= $newText . " ";
}
}
}
}
}
}
}
exec('rm -rf /var/www/html/var/cache/common/*');
}
public function onPreIndex(PreIndexEvent $event): void
{
$document = $event->getDocument();
$document->addField(new Field('textContent', $this->indexedText, 'string'));
$document->addField(new Field('textFiles', $this->filesUrl, 'array'));
}
private function cleanerText($text): string
{
$cleanerText = strip_tags($text);
$cleanerText = str_replace(" ", " ", $cleanerText);
$cleanerText = mb_strtolower($cleanerText, 'UTF-8');
$cleanerText = trim($cleanerText);
return $cleanerText;
}
private function decodePdfText($text): string
{
$arr = ["\t", "\n", " "];
$t = utf8_decode($text);
$t = str_replace($arr, " ", $t);
$t = str_replace("?", "'", $t);
$t = mb_strtolower(mb_convert_encoding($t, 'UTF-8', 'HTML-ENTITIES'), 'UTF-8');
return $t;
}
}