<?php

namespace boru\ocr;

use boru\ocr\Agent\AiOptions;
use boru\ocr\Agent\AgentOptions;
use boru\ocr\Agent\AiOcrOrchestrator;
use boru\ocr\Layout\LayoutOptions;
use boru\ocr\OcrEngineResult;
use boru\ocr\Pipeline\OcrPipeline;
use boru\ocr\Pipeline\OcrPipelineResultBundle;
use boru\ocr\Traits\OcrLogTrait;

class OcrEngine
{
    /** @var string */
    protected $sourceFile;

    /** @var string|null */
    protected $imageDir;

    /** @var array */
    protected $tesseractOptions = [
        'lang' => 'eng',
        // good defaults; adjust as needed
        'config' => [
            'preserve_interword_spaces' => '1',
        ],
    ];

    /** @var array */
    protected $providerOptions = [];

    /** @var LayoutOptions */
    protected $layoutOptions;

    /** @var OCRLogger|null */
    protected $logger;
    use OcrLogTrait;

    /** @var string */
    protected $tesseractBinary = 'tesseract';


    protected $useAI = false;
    protected $userPlanner = false;
    protected $useTableInterpreter = false;
    protected $plannerAgentOptions = null;
    protected $tableInterpreterOptions = null;
    protected $finalAgentOptions = null;

    /** @var AiOptions|null */
    protected $aiOptions = null;

    

    /**
     * @param string $sourceFile
     */
    public function __construct($sourceFile)
    {
        $this->sourceFile = (string)$sourceFile;
    }

    /**
     * Named constructor.
     * @param string $sourceFile
     * @return self
     */
    public static function forFile($sourceFile)
    {
        return new self($sourceFile);
    }

    /**
     * @param string $dir
     * @return $this
     */
    public function withImageDir($dir)
    {
        $this->imageDir = (string)$dir;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function withTesseractOptions(array $opts)
    {
        $this->tesseractOptions = $opts;
        return $this;
    }

    /**
     * @param array $opts
     * @return $this
     */
    public function withProviderOptions(array $opts)
    {
        $this->providerOptions = $opts;
        return $this;
    }

    /**
     * @param array|LayoutOptions $opts
     * @return $this
     */
    public function withLayoutOptions($opts)
    {
        if ($opts instanceof LayoutOptions) {
            $this->layoutOptions = $opts;
        } elseif (is_array($opts)) {
            $this->layoutOptions = LayoutOptions::create($opts);
        }
        return $this;
    }

    /**
     * @param OCRLogger $logger
     * @return $this
     */
    public function withLogger(OCRLogger $logger)
    {
        $this->logger = $logger;
        return $this;
    }

    /**
     * @param string $binary
     * @return $this
     */
    public function withTesseractBinary($binary)
    {
        $this->tesseractBinary = (string)$binary;
        return $this;
    }

    /**
     * Enable AI.. optionally set custom agent options
     * @param AgentOptions|bool $useAI
     * @return $this
     */
    public function withAI($useAI=true)
    {
        if($useAI instanceof AgentOptions){
            $this->finalAgentOptions = $useAI;
            $this->useAI = true;
            return $this;
        }
        if($useAI instanceof AiOptions){
            $this->aiOptions = $useAI;
            $this->useAI = true;
            return $this;
        } elseif(is_array($useAI)) {
            $this->aiOptions = AiOptions::fromArray($useAI);
            $this->useAI = true;
            return $this;
        }
        $this->useAI = $useAI;
        return $this;
    }

    /**
     * Enable AI Planner.. optionally set custom agent options
     * @param AgentOptions|bool $usePlanner
     * @return $this
     */
    public function withPlanner($usePlanner=true)
    {
        if($usePlanner instanceof AgentOptions){
            $this->plannerAgentOptions = $usePlanner;
            $this->userPlanner = true;
            return $this;
        }
        $this->userPlanner = $usePlanner;
        return $this;
    }

    /**
     * Enable AI Table Interpreter.. optionally set custom agent options
     * @param AgentOptions|bool $useTableInterpreter
     * @return $this
     */
    public function withTableInterpreter($useTableInterpreter=true)
    {
        if($useTableInterpreter instanceof AgentOptions){
            $this->tableInterpreterOptions = $useTableInterpreter;
            $this->useTableInterpreter = true;
            return $this;
        }
        $this->useTableInterpreter = $useTableInterpreter;
        return $this;
    }

    /**
     * Run the deterministic pipeline (no AI yet).
     *
     * Returns an OcrEngineResult wrapper with conveniences for callers.
     *
     * @return OcrEngineResult
     * @throws \Exception
     */
    public function run()
    {
        if(!$this->logger){
            $this->logger = new OCRLogger();
        }
        if(!$this->imageDir){
            $this->imageDir = sys_get_temp_dir().'/ocr_'.uniqid();
            if (!is_dir($this->imageDir)) {
               @mkdir($this->imageDir, 0777, true);
            }
        }
        $this->logger->info("Starting OCR pipeline for file: {$this->sourceFile}, using image dir: {$this->imageDir}\n");
        if(!isset($this->tesseractOptions["logger"]) || $this->tesseractOptions["logger"] === null) {
            $this->tesseractOptions["logger"] = $this->logger;
        }
        //if($this->useAI) {
            //$this->layoutOptions["tableRegionMarkers"] = true;
        //}
        $pipeline = new OcrPipeline(array(
            'logger' => $this->logger,
            'tesseractBinary' => $this->tesseractBinary,
            'tesseractOptions' => $this->tesseractOptions,
            'providerOptions' => $this->providerOptions,
            'layoutOptions' => $this->layoutOptions,
            'imageDir' => $this->imageDir,
        ));

        /** @var OcrPipelineResultBundle $bundle */
        $bundle = $pipeline->run($this->sourceFile);
        $result = new OcrEngineResult($bundle->result, $bundle->evidence);
        $result->setTesseractOptions($pipeline->getTesseractOptions());

        // Skip AI for certain direct-text source types.
        $sourceType = isset($bundle->result->metrics['type']) ? $bundle->result->metrics['type'] : null;
        $skipAiTypes = array('spreadsheet', 'word');

        $skipAi = in_array($sourceType, $skipAiTypes, true);

        if($this->useAI && !$skipAi) {
            if(!$this->finalAgentOptions){
                $this->finalAgentOptions = $this->defaultAgentOptions();
            }
            if(!$this->plannerAgentOptions){
                $this->plannerAgentOptions = $this->defaultAgentOptions();
            }
            if(!$this->tableInterpreterOptions){
                $this->tableInterpreterOptions = $this->defaultAgentOptions();
            }
            if(!$this->aiOptions){
                $this->aiOptions = AiOptions::fromArray([
                    "usePlanner" => $this->userPlanner,
                    "useTableInterpreter" => $this->useTableInterpreter,
                    "tableInterpreterOptions" => $this->tableInterpreterOptions,
                    "plannerAgentOptions" => $this->plannerAgentOptions,
                    "finalAgentOptions" => $this->finalAgentOptions,
                ]);
            }
            $orc = new AiOcrOrchestrator($this->aiOptions, $this->logger);

            $final = $orc->run($this->sourceFile, $result);
            $result->setFinalText($final);
        }
        return $result;
    }

    private function defaultAgentOptions(){
        return new AgentOptions([
            'logger' => $this->logger,
            "model" => "gpt-4.1",
        ]);
    }
}
