<?php

namespace boru\ocr;

use boru\ocr\Agent\AgentOptions;
use boru\ocr\Agent\AiOcrOrchestrator;
use boru\ocr\Pipeline\OcrPipeline;
use boru\ocr\Pipeline\OcrPipelineResultBundle;
use boru\ocr\Pipeline\OcrResult;
use boru\ocr\Format\FrameFormatter;

class OcrEngine
{
    /** @var string */
    protected $sourceFile;

    /** @var string|null */
    protected $imageDir;

    /** @var array */
    protected $tesseractOptions = [
        'lang' => 'eng',
        // good defaults; adjust as needed
        'config' => [
            'preserve_interword_spaces' => '1',
        ],
    ];

    /** @var array */
    protected $providerOptions = [];

    /** @var array */
    protected $layoutOptions = [
        'minConf' => 0,
        'multiColumn' => true,
    ];

    /** @var callable|null */
    protected $logger;

    /** @var string */
    protected $tesseractBinary = 'tesseract';


    protected $useAI = false;
    protected $userPlanner = false;
    protected $plannerAgentOptions = null;
    protected $finalAgentOptions = null;

    /**
     * @param string $sourceFile
     */
    public function __construct($sourceFile)
    {
        $this->sourceFile = (string)$sourceFile;
    }

    /**
     * Named constructor.
     * @param string $sourceFile
     * @return self
     */
    public static function forFile($sourceFile)
    {
        return new self($sourceFile);
    }

    /**
     * @param string $dir
     * @return $this
     */
    public function withImageDir($dir)
    {
        $this->imageDir = (string)$dir;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function withTesseractOptions(array $opts)
    {
        $this->tesseractOptions = $opts;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function withProviderOptions(array $opts)
    {
        $this->providerOptions = $opts;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function withLayoutOptions(array $opts)
    {
        $this->layoutOptions = $opts;
        return $this;
    }

    /**
     * @param callable $logger function(string $msg):void
     * @return $this
     */
    public function withLogger($logger)
    {
        $this->logger = $logger;
        return $this;
    }

    /**
     * @param string $binary
     * @return $this
     */
    public function withTesseractBinary($binary)
    {
        $this->tesseractBinary = (string)$binary;
        return $this;
    }

    /**
     * Enable AI.. optionally set custom agent options
     * @param AgentOptions|bool $useAI
     * @return $this
     */
    public function withAI($useAI=true)
    {
        if($useAI instanceof AgentOptions){
            $this->finalAgentOptions = $useAI;
            $this->useAI = true;
            return $this;
        }
        $this->useAI = $useAI;
        return $this;
    }

    /**
     * Enable AI Planner.. optionally set custom agent options
     * @param AgentOptions|bool $usePlanner
     * @return $this
     */
    public function withPlanner($usePlanner=true)
    {
        if($usePlanner instanceof AgentOptions){
            $this->plannerAgentOptions = $usePlanner;
            $this->userPlanner = true;
            return $this;
        }
        $this->userPlanner = $usePlanner;
        return $this;
    }

    /**
     * Run the deterministic pipeline (no AI yet).
     *
     * Returns an OcrEngineResult wrapper with conveniences for callers.
     *
     * @return OcrEngineResult
     * @throws \Exception
     */
    public function run()
    {
        if(!$this->imageDir){
            $this->imageDir = sys_get_temp_dir().'/ocr_'.uniqid();
            if (!is_dir($this->imageDir)) {
               @mkdir($this->imageDir, 0777, true);
            }
        }
        $pipeline = new OcrPipeline(array(
            'logger' => $this->logger,
            'tesseractBinary' => $this->tesseractBinary,
            'tesseractOptions' => $this->tesseractOptions,
            'providerOptions' => $this->providerOptions,
            'layoutOptions' => $this->layoutOptions,
            'imageDir' => $this->imageDir,
        ));

        /** @var OcrPipelineResultBundle $bundle */
        $bundle = $pipeline->run($this->sourceFile);
        $result = new OcrEngineResult($bundle->result, $bundle->evidence);
        if($this->useAI) {
            $orc = new AiOcrOrchestrator([
                "usePlanner" => $this->userPlanner,
            ], $this->logger);

            if(!$this->finalAgentOptions){
                $this->finalAgentOptions = $this->defaultAgentOptions();
            }
            if(!$this->plannerAgentOptions){
                $this->plannerAgentOptions = $this->defaultAgentOptions();
            }

            $final = $orc->run($this->sourceFile, $result, $this->plannerAgentOptions, $this->finalAgentOptions);
            $result->setFinalText($final);
        }
        return $result;
    }

    private function defaultAgentOptions(){
        return new AgentOptions([
            'logger' => $this->logger,
            "model" => "gpt-4.1",
        ]);
    }
}
