<?php

namespace boru\ocr;

use boru\ocr\Agent\AgentOptions;
use boru\ocr\Agent\AiOcrOrchestrator;

/**
 * High-level parent API.
 *
 * Usage:
 *   $result = OcrClient::create()
 *      ->tmpDir('/tmp/boru_ocr')
 *      ->tesseractOptions([...])
 *      ->layoutOptions([...])
 *      ->providerOptions([...])
 *      ->reference('abc')
 *      ->enableAi(true)
 *      ->enablePlanner(true)
 *      ->run($filePath);
 *
 *   echo $result->getText();
 */
class OcrClient
{
    /** @var string|null */
    protected $tmpDir = null;

    /** @var callable|null */
    protected $logger = null;

    /** @var array */
    protected $tesseractOptions = array();

    /** @var array */
    protected $layoutOptions = array();

    /** @var array */
    protected $providerOptions = array();

    protected $plannerAgentOptions = null;
    protected $finalAgentOptions = null;

    /** @var string */
    protected $reference = "";

    /** @var bool */
    protected $enableAi = false;

    /** @var bool */
    protected $enablePlanner = true;

    /** @var string */
    protected $tesseractBinary = 'tesseract';

    /**
     * @return self
     */
    public static function create()
    {
        return new self();
    }

    public function __construct() {
        $this->defaults();
    }

    /**
     * @param string $dir
     * @return $this
     */
    public function tmpDir($dir)
    {
        $this->tmpDir = (string)$dir;
        return $this;
    }

    /**
     * @param callable $logger function(string $msg):void
     * @return $this
     */
    public function logger($logger)
    {
        $this->logger = $logger;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function tesseractOptions(array $opts)
    {
        $this->tesseractOptions = $opts;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function layoutOptions(array $opts)
    {
        $this->layoutOptions = $opts;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function providerOptions(array $opts)
    {
        $this->providerOptions = $opts;
        return $this;
    }

    /**
     * @param string $reference
     * @return $this
     */
    public function reference($reference)
    {
        $this->reference = (string)$reference;
        return $this;
    }

    /**
     * @param bool $enabled
     * @return $this
     */
    public function enableAi($enabled)
    {
        $this->enableAi = (bool)$enabled;
        return $this;
    }

    /**
     * @param bool $enabled
     * @return $this
     */
    public function enablePlanner($enabled)
    {
        $this->enablePlanner = (bool)$enabled;
        return $this;
    }

    /**
     * @param string $binary
     * @return $this
     */
    public function tesseractBinary($binary)
    {
        $this->tesseractBinary = (string)$binary;
        return $this;
    }

    /**
     * @param AgentOptions|null $agentOptions
     * @return $this
     */
    public function plannerAgentOptions($agentOptions=null) {
        $this->plannerAgentOptions = $agentOptions;
        return $this;
    }

    /**
     * @param AgentOptions|null $agentOptions
     * @return $this
     */
    public function finalAgentOptions($agentOptions=null) {
        $this->finalAgentOptions = $agentOptions;
        return $this;
    }

    /**
     * Run OCR (deterministic, with optional AI enhancement).
     *
     * @param string $filePath
     * @return OcrEngineResult
     * @throws \Exception
     */
    public function run($filePath)
    {
        $filePath = (string)$filePath;
        if ($filePath === '' || !file_exists($filePath)) {
            throw new \Exception("OcrClient: file not found: " . $filePath);
        }

        $tmpDir = $this->tmpDir;
        if ($tmpDir === null || $tmpDir === '') {
            $tmpDir = sys_get_temp_dir() . '/boru_ocr_images';
        }

        if (!is_dir($tmpDir)) {
            // best effort
            @mkdir($tmpDir, 0777, true);
        }

        $engine = OcrEngine::forFile($filePath)
            ->withImageDir($tmpDir)
            ->withTesseractBinary($this->tesseractBinary)
            ->withProviderOptions($this->providerOptions)
            ->withTesseractOptions($this->tesseractOptions)
            ->withLayoutOptions($this->layoutOptions);

        if ($this->logger) {
            $engine->withLogger($this->logger);
        }

        // Deterministic run
        $bundle = $engine->run();

        // Optional AI
        if ($this->enableAi) {
            if ($this->plannerAgentOptions === null) {
                $this->plannerAgentOptions = new AgentOptions();
                $this->plannerAgentOptions->reference($this->reference);
                $this->plannerAgentOptions->logger($this->logger);
            }
            if ($this->finalAgentOptions === null) {
                $this->finalAgentOptions = new AgentOptions();
                $this->finalAgentOptions->reference($this->reference);
                $this->finalAgentOptions->logger($this->logger);
            }
            $orch = new AiOcrOrchestrator(array(
                'usePlanner' => $this->enablePlanner,
            ), $this->logger);
            $final = $orch->run($filePath, $bundle, $this->plannerAgentOptions, $this->finalAgentOptions);
            $bundle->setFinalText($final);
        }

        return $bundle;
    }

        /**
     * Apply recommended defaults for most OCR use cases.
     *
     * These defaults are tuned for:
     * - PDFs with mixed text/tables
     * - Images/screenshots
     * - Legal/financial documents
     * - AI post-processing compatibility
     *
     * @return $this
     */
    public function defaults()
    {
        // Temp dir (lazy default; caller can still override)
        if ($this->tmpDir === null) {
            $this->tmpDir = sys_get_temp_dir() . '/boru_ocr_images';
        }

        // Tesseract defaults
        if (empty($this->tesseractOptions)) {
            $this->tesseractOptions = array(
                'lang' => 'eng',
                'config' => array(
                    // Improves spacing fidelity for TSV + layout
                    'preserve_interword_spaces' => '1',
                ),
            );
        }

        // Layout defaults (TSV-driven)
        if (empty($this->layoutOptions)) {
            $this->layoutOptions = array(
                // Include all words; let AI decide relevance
                'minConf' => 0,

                // Enables multi-column detection in layout builder
                'multiColumn' => true,

                // Sensible reading-order heuristics
                'sort' => 'reading',
            );
        }

        // Provider defaults (PDF rendering / tiling)
        if (empty($this->providerOptions)) {
            $this->providerOptions = array(
                // Let factory decide VIPS → MuPDF → Imagick
                'dpi' => 300,

                // Enable tiling when provider supports it
                'tile' => true,

                // Reasonable tile size for OCR accuracy
                'tileWidth' => 2000,
                'tileHeight' => 2000,
            );
        }

        // Planner is useful by default when AI is enabled
        if ($this->enableAi && !isset($this->enablePlanner)) {
            $this->enablePlanner = true;
        }

        return $this;
    }

}
