<?php

namespace boru\ocr\Tesseract;

use boru\ocr\OCRLogger;
use boru\ocr\Traits\OcrLogTrait;

class TesseractCliRunner
{
    /** @var string */
    protected $binary;

    /** @var OCRLogger|null */
    protected $logger = null;
    use OcrLogTrait;

    /**
     * @param string $binary Path or command name (e.g. 'tesseract')
     */
    public function __construct($binary = 'tesseract')
    {
        $this->binary = $binary;
    }

    /**
     * Runs tesseract and returns raw output (stdout).
     *
     * @param string $imagePath
     * @param TesseractOptions $opts
     * @param string $format 'txt' or 'tsv'
     * @return string
     * @throws \Exception
     */
        public function run($imagePath, TesseractOptions $opts, $format)
    {
        if (!is_string($imagePath) || $imagePath === '' || !file_exists($imagePath)) {
            throw new \Exception("TesseractCliRunner: imagePath not found: " . (string)$imagePath);
        }
        if ($opts->logger !== null && $opts->logger instanceof OCRLogger) {
            $this->logger = $opts->logger;
        }

        $format = strtolower((string)$format);
        if ($format !== 'txt' && $format !== 'tsv') {
            throw new \Exception("TesseractCliRunner: invalid format: " . (string)$format);
        }

        // Resolve real path and directory for docker mount purposes
        $realImagePath = realpath($imagePath);
        if ($realImagePath === false) {
            // Fallback to original path if realpath fails, but still attempt to run
            $realImagePath = $imagePath;
        }
        $imageDir  = dirname($realImagePath);
        $imageBase = basename($realImagePath);

        $cmdParts = array();

        if ($opts->dockerMode) {
            // Docker mode: build a docker run ... tesseract command
            // Example:
            // docker run -i --rm -v /host/dir:/tmp <image> tesseract /tmp/<basename> stdout ...
            $dockerBinary = 'docker run';

            // docker executable
            $cmdParts[] = escapeshellcmd($dockerBinary);

            // docker options (raw string, can contain multiple flags)
            if (is_string($opts->dockerOptions) && $opts->dockerOptions !== '') {
                $cmdParts[] = $opts->dockerOptions;
            }

            // mount host dir (realpath) into container
            // e.g. -v /abs/path:/tmp
            $mountTarget = $opts->dockerMountTarget;
            if ($mountTarget === '' || $mountTarget === null) {
                $mountTarget = '/tmp';
            }
            // We escape dir and target together as a single token.
            $cmdParts[] = '-v';
            $cmdParts[] = escapeshellarg($imageDir . ':' . $mountTarget);

            // docker image
            $cmdParts[] = escapeshellarg($opts->dockerImage);

            // inside container: tesseract <mountedPath>/<basename> stdout ...
            $insideImagePath = rtrim($mountTarget, '/')
                . '/'
                . $imageBase;
            $cmdParts[] = 'tesseract';
            $cmdParts[] = escapeshellarg($insideImagePath);
            $cmdParts[] = 'stdout';
        } else {
            // Classic mode: call tesseract directly as before
            $cmdParts[] = escapeshellcmd($this->binary);
            $cmdParts[] = escapeshellarg($imagePath);
            $cmdParts[] = 'stdout';
        }

        // Common args (lang, psm, oem, config)
        $args = $this->buildArgs($opts);
        foreach ($args as $a) {
            $cmdParts[] = $a;
        }

        if ($format === 'tsv') {
            $cmdParts[] = 'tsv';
        }

        $cmd = implode(' ', $cmdParts);

        if ($opts->quiet) {
            $cmd .= ' 2>/dev/null';
        }

        $this->logDebug("TesseractCliRunner: running command: " . $cmd);
        $out = shell_exec($cmd);

        if (!is_string($out) || $out === '') {
            // Still return empty string (some images truly return nothing), but caller can decide.
            return '';
        }

        return $out;
    }


    /**
     * @param TesseractOptions $opts
     * @return array Array of CLI args (already escaped where needed)
     */
    protected function buildArgs(TesseractOptions $opts)
    {
        $args = array();

        if ($opts->lang !== null && $opts->lang !== '') {
            $args[] = '-l';
            $args[] = escapeshellarg($opts->lang);
        }

        if ($opts->psm !== null) {
            $args[] = '--psm';
            $args[] = (string)intval($opts->psm);
        }

        if ($opts->oem !== null) {
            $args[] = '--oem';
            $args[] = (string)intval($opts->oem);
        }

        // config can be:
        //  - ['preserve_interword_spaces' => '1']
        //  - ['-c', 'foo=bar'] (raw style)
        foreach ($opts->config as $k => $v) {
            if (is_int($k)) {
                // raw tokens
                $args[] = (string)$v;
                continue;
            }
            $args[] = '-c';
            $args[] = escapeshellarg($k . '=' . $v);
        }

        return $args;
    }
}
