src/EventSubscriber/PagePublishSubscriber.php line 95

Open in your IDE?
  1. <?php
  2. namespace App\EventSubscriber;
  3. use Massive\Bundle\SearchBundle\Search\Field;
  4. use Massive\Bundle\SearchBundle\Search\SearchManagerInterface;
  5. use Massive\Bundle\SearchBundle\Search\Event\PreIndexEvent;
  6. use Smalot\PdfParser\Parser;
  7. use Sulu\Bundle\ArticleBundle\Document\ArticleDocument;
  8. use Sulu\Bundle\MediaBundle\Api\Media;
  9. use Sulu\Bundle\MediaBundle\Entity\MediaRepositoryInterface;
  10. use Sulu\Bundle\MediaBundle\Media\Storage\StorageInterface;
  11. use Sulu\Bundle\PageBundle\Document\PageDocument;
  12. use Sulu\Component\DocumentManager\Event\PublishEvent;
  13. use Sulu\Component\DocumentManager\Events;
  14. use Sulu\Component\DocumentManager\Subscriber\EventSubscriberInterface;
  15. class PagePublishSubscriber implements EventSubscriberInterface
  16. {
  17.     private MediaRepositoryInterface $media;
  18.     private StorageInterface $storage;
  19.     private SearchManagerInterface $searchManager;
  20.     private string $indexedText '';
  21.     private array $filesUrl = array();
  22.     public function __construct(
  23.         MediaRepositoryInterface $media,
  24.         StorageInterface $storage,
  25.         SearchManagerInterface $searchManager
  26.     ) {
  27.         $this->media $media;
  28.         $this->storage $storage;
  29.         $this->searchManager $searchManager;
  30.     }
  31.     public static function getSubscribedEvents()
  32.     {
  33.         return [
  34.             Events::PUBLISH => ['onPublish'0],
  35.             'massive_search.pre_index' => ['onPreIndex', -200]
  36.         ];
  37.     }
  38.     public function onPublish(PublishEvent $event): void
  39.     {
  40.         $document $event->getDocument();
  41.         if ($document instanceof PageDocument || $document instanceof ArticleDocument) {
  42.             $properties $document->getStructure()->toArray();
  43.             $theUploadedMedia '';
  44.             foreach ($properties as $key => $property) {
  45.                 // if the property is not an array nor a boolean
  46.                 if (gettype($property) !== "array") {
  47.                     if (gettype($property) !== "boolean") {
  48.                         if ($property === null || $key === "filter") {
  49.                             // if the property is null then skip the step and continue the loop
  50.                             continue;
  51.                         }
  52.                         // else index the text
  53.                         $this->indexedText .= $this->cleanerText($property) . " ";
  54.                     }
  55.                     // checks only the "main_content" array
  56.                 } else {
  57.                     if ($key === "main_content")
  58.                         foreach ($property as $array) {
  59.                             if ($array) {
  60.                                 if (array_key_exists('title'$array)) {
  61.                                     $this->indexedText .= $this->cleanerText($array['title']) . " ";
  62.                                 }
  63.                                 if ($array['type'] == "file") {
  64.                                     $fileId $array['file']['id'];
  65.                                     $theUploadedMedia $this->media->findMediaById($fileId);
  66.                                     $media = new Media($theUploadedMedia"fr");
  67.                                     if ($media->getExtension() === "pdf") {
  68.                                         $fileTitle $media->getTitle();
  69.                                         $fileName $media->getName();
  70.                                         $fileUrl "/media/" $fileId "/download/" $fileName;
  71.                                         array_push($this->filesUrl, [$fileUrl$fileTitle]);
  72.                                         $parser = new Parser();
  73.                                         $storageOption $media->getStorageOptions();
  74.                                         $path $this->storage->getPath($storageOption);
  75.                                         $pdf $parser->parseFile($path);
  76.                                         $text $pdf->getText();
  77.                                         $newText $this->decodePdfText($text);
  78.                                         $this->indexedText .= $newText " ";
  79.                                     }
  80.                                 }
  81.                             }
  82.                         }
  83.                 }
  84.             }
  85.         }
  86.         exec('rm -rf /var/www/html/var/cache/common/*');
  87.     }
  88.     public function onPreIndex(PreIndexEvent $event): void
  89.     {
  90.         $document $event->getDocument();
  91.         $document->addField(new Field('textContent'$this->indexedText'string'));
  92.         $document->addField(new Field('textFiles'$this->filesUrl'array'));
  93.     }
  94.     private function cleanerText($text): string
  95.     {
  96.         $cleanerText strip_tags($text);
  97.         $cleanerText str_replace("&nbsp;"" "$cleanerText);
  98.         $cleanerText mb_strtolower($cleanerText'UTF-8');
  99.         $cleanerText trim($cleanerText);
  100.         return $cleanerText;
  101.     }
  102.     private function decodePdfText($text): string
  103.     {
  104.         $arr = ["\t""\n""  "];
  105.         $t utf8_decode($text);
  106.         $t str_replace($arr" "$t);
  107.         $t str_replace("?""'"$t);
  108.         $t mb_strtolower(mb_convert_encoding($t'UTF-8''HTML-ENTITIES'), 'UTF-8');
  109.         return $t;
  110.     }
  111. }