<?php
namespace boru\boruai\OCR\Tesseract;

use boru\boruai\OCR\Contract\PageImageProviderInterface;
use Imagick;
use ImagickException;

class MuPdfPdfPageImageProvider implements PageImageProviderInterface
{
    private $pdfPath;
    private $imageDir;

    private $tileDpi = 600;
    private $tilePixelThreshold = 0; // 0 = never tile
    private $tileCols = 2;
    private $tileRows = 2;
    private $tileOverlap = 20;
    private $mutoolBandHeight = 0;

    /** @var array<int,string|array<int,array{path:string,offset_x:int,offset_y:int}>>|null */
    private $pagesCache = null;

    /**
     * @param string $pdfPath
     * @param string $imageDir
     * @param array<string,mixed> $options
     */
    public function __construct(string $pdfPath, string $imageDir, array $options = [])
    {
        $this->pdfPath  = $pdfPath;
        $this->imageDir = rtrim($imageDir, '/');

        if (!is_dir($this->imageDir)) {
            @mkdir($this->imageDir, 0777, true);
        }

        if (isset($options['tileDpi'])) {
            $this->tileDpi = max(72, (int)$options['tileDpi']);
        }
        if (isset($options['tilePixelThreshold'])) {
            $this->tilePixelThreshold = (int)$options['tilePixelThreshold'];
        }
        if (isset($options['tileCols'])) {
            $this->tileCols = max(1, (int)$options['tileCols']);
        }
        if (isset($options['tileRows'])) {
            $this->tileRows = max(1, (int)$options['tileRows']);
        }
        if (isset($options['tileOverlap'])) {
            $this->tileOverlap = max(0, (int)$options['tileOverlap']);
        }
        if (isset($options['mutoolBandHeight'])) {
            $this->mutoolBandHeight = max(0, (int)$options['mutoolBandHeight']);
        }
    }

    public static function isAvailable(): bool
    {
        $out = @shell_exec('command -v mutool 2>/dev/null');
        return is_string($out) && trim($out) !== '';
    }

    /**
     * @return array<int,string|array<int,array{path:string,offset_x:int,offset_y:int}>>
     * @throws ImagickException
     */
    public function getPageImages(): array
    {
        if ($this->pagesCache !== null) {
            return $this->pagesCache;
        }

        if (!self::isAvailable()) {
            throw new \RuntimeException("mutool not found in PATH; cannot use MuPdfPdfPageImageProvider.");
        }

        $pageCount = $this->getPageCount();
        if ($pageCount < 1) {
            $pageCount = 1;
        }

        $result = [];

        for ($page = 1; $page <= $pageCount; $page++) {
            $pngPath = $this->renderPageWithMutool($page);

            if (!file_exists($pngPath)) {
                $result[$page - 1] = "[MUPDF RENDER FAILED]";
                continue;
            }

            $size = @getimagesize($pngPath);
            if (!$size) {
                $result[$page - 1] = $pngPath;
                continue;
            }

            $width  = (int)$size[0];
            $height = (int)$size[1];
            $pixels = $width * $height;

            if ($this->tilePixelThreshold > 0 && $pixels > $this->tilePixelThreshold) {
                // Tile the big PNG using Imagick (bitmap only, no PDF rendering).
                $result[$page - 1] = $this->createTilesFromPng($pngPath, $page - 1, $width, $height);
            } else {
                $result[$page - 1] = $pngPath;
            }
        }

        $this->pagesCache = $result;
        return $result;
    }

    public function cleanup(): void
    {
        foreach (glob($this->imageDir . '/*.png') as $file) {
            @unlink($file);
        }
        @rmdir($this->imageDir);
    }

    private function getPageCount(): int
    {
        $cmd    = 'mutool info ' . escapeshellarg($this->pdfPath) . ' 2>/dev/null';
        $output = shell_exec($cmd);
        if (!is_string($output) || $output === '') {
            return 1;
        }

        if (preg_match('/Pages:\s*([0-9]+)/', $output, $m)) {
            return max(1, (int)$m[1]);
        }

        if (preg_match_all('/^Page\s+([0-9]+)/mi', $output, $mm)) {
            $count = count($mm[1]);
            if ($count > 0) {
                return $count;
            }
        }

        return 1;
    }

    /**
     * Render a page to PNG at tileDpi using mutool draw.
     */
    private function renderPageWithMutool(int $page): string
    {
        $pattern = $this->imageDir . '/mupdf_page_%d.png';
        $outPath = $this->imageDir . '/mupdf_page_' . $page . '.png';
        @unlink($outPath);

        $cmdParts = [
            'mutool', 'draw',
            '-q',
            '-F', 'png',
            '-r', (string)$this->tileDpi,
        ];

        if ($this->mutoolBandHeight > 0) {
            $cmdParts[] = '-B';
            $cmdParts[] = (string)$this->mutoolBandHeight;
        }

        $cmdParts[] = '-o';
        $cmdParts[] = $pattern;
        $cmdParts[] = escapeshellarg($this->pdfPath);
        $cmdParts[] = (string)$page;

        $cmd = implode(' ', $cmdParts) . ' 2>/dev/null';

        // echo "[MuPDF] $cmd\n";
        shell_exec($cmd);

        return $outPath;
    }

    /**
     * Crop an already rendered PNG into tiles (bitmap only, no PDF rendering).
     *
     * @param string $pngPath
     * @param int    $pageIndex 0-based
     * @param int    $width
     * @param int    $height
     * @return array<int,array{path:string,offset_x:int,offset_y:int}>
     * @throws ImagickException
     */
    private function createTilesFromPng(string $pngPath, int $pageIndex, int $width, int $height): array
    {
        $tiles = [];

        $cols    = max(1, $this->tileCols);
        $rows    = max(1, $this->tileRows);
        $overlap = $this->tileOverlap;

        $tileWidth  = (int)ceil($width  / $cols);
        $tileHeight = (int)ceil($height / $rows);

        $im = new Imagick();
        $im->readImage($pngPath);
        $im->setImageFormat('png');

        for ($ty = 0; $ty < $rows; $ty++) {
            for ($tx = 0; $tx < $cols; $tx++) {
                $x = $tx * $tileWidth;
                $y = $ty * $tileHeight;

                $x = max(0, $x - $overlap);
                $y = max(0, $y - $overlap);

                $wTile = min($tileWidth + $overlap * 2, $width  - $x);
                $hTile = min($tileHeight + $overlap * 2, $height - $y);

                if ($wTile <= 0 || $hTile <= 0) {
                    continue;
                }

                $tile = clone $im;
                $tile->cropImage($wTile, $hTile, $x, $y);
                $tile->setImagePage(0, 0, 0, 0);
                $tile->setImageCompressionQuality(100);

                $tilePath = sprintf(
                    '%s/page_%d_tile_%d_%d.png',
                    $this->imageDir,
                    $pageIndex,
                    $tx,
                    $ty
                );

                $tile->writeImage($tilePath);
                $tile->clear();
                $tile->destroy();

                $tiles[] = [
                    'path'     => $tilePath,
                    'offset_x' => $x,
                    'offset_y' => $y,
                ];
            }
        }

        $im->clear();
        $im->destroy();

        if (!empty($tiles)) {
            @copy(
                $tiles[count($tiles) - 1]['path'],
                '/var/work/libs/boruai/tests/pdfs/last_ocr_page.png'
            );
        }

        return $tiles;
    }
}
