<?php

namespace boru\ocr;

use boru\ocr\Agent\AiOptions;
use boru\ocr\Pipeline\OcrResult;
use boru\ocr\Evidence\TsvEvidenceIndex;
use boru\ocr\Format\FrameFormatter;
use boru\ocr\Format\OutputNormalizer;
use boru\ocr\Tesseract\Tsv\TsvSerializer;

class OcrEngineResult
{
    /** @var OcrResult */
    protected $result;

    /** @var TsvEvidenceIndex */
    protected $evidence;

    /** @var string|null */
    protected $finalText = null; // reserved for later AI stage

    protected $tableInterpreterOutput = null;
    protected $plannerOutput = null;

    protected $tesseractOptions;

    public function __construct(OcrResult $result, TsvEvidenceIndex $evidence)
    {
        $this->result = $result;
        $this->evidence = $evidence;
    }

    public function setTesseractOptions($options)
    {
        $this->tesseractOptions = $options;
    }

    public function setPlannerOutput($output)
    {
        $this->plannerOutput = $output;
    }

    public function setTableInterpreterOutput($output)
    {
        $this->tableInterpreterOutput = $output;
    }

    public function confidence()
    {
        return $this->result->confidence;
    }


    /**
     * Access the raw result DTO (all artifacts).
     * @return OcrResult
     */
    public function raw()
    {
        return $this->result;
    }

    /**
     * Access evidence index for callbacks / targeted retrieval.
     * @return TsvEvidenceIndex
     */
    public function evidence()
    {
        return $this->evidence;
    }

    /**
     * Baseline per-page text from TSV layout builder.
     * @return array<int,string>
     */
    public function baselinePages()
    {
        return $this->result->baselinePages;
    }

    /**
     * Baseline framed as a single string (useful for feeding an agent).
     * @return string
     */
    public function baselineFramed()
    {
        return FrameFormatter::framePages($this->result->baselinePages, 'TSV BASELINE OUTPUT');
    }

    /**
     * Plain text OCR pages.
     * @return array<int,string>
     */
    public function textPages()
    {
        return $this->result->textPages;
    }

    /**
     * Plain text framed.
     * @return string
     */
    public function textFramed()
    {
        return FrameFormatter::framePages($this->result->textPages, 'TESSERACT TEXT OUTPUT');
    }

    /**
     * Placeholder for future final AI output.
     * For now, returns baseline framed.
     *
     * @return string
     */
    public function getText()
    {
        $text = $this->finalText ? $this->finalText : $this->baselineFramed();

        return OutputNormalizer::normalize($text, array(
            'collapseSpaces' => true,
            'preserveIndent' => true,
            'collapseBlankLines' => true,
        ));
    }

    /**
     * Later, your AI layer can set this after post-processing.
     *
     * @param string $text
     * @return void
     */
    public function setFinalText($text)
    {
        $this->finalText = (string)$text;
    }

        /**
     * Run AI post-processing using TSV tools and store finalText.
     *
     * @param string $pdfPath
     * @param string $reference
     * @param array $options
     * @return string
     */
    public function runAi($pdfPath, $reference = "", array $options = array())
    {
        $orch = new \boru\ocr\Agent\AiOcrOrchestrator(AiOptions::create($options));
        $final = $orch->run($pdfPath, $this, $reference);
        $this->setFinalText($final);
        return $final;
    }

    /**
     * Table candidates (page/region hints) framed for agents/debugging.
     *
     * @return string
     */
    public function tableCandidatesFramed($maxCandidates = 40, $title = false)
    {
        $cands = array();
    if (isset($this->result) && isset($this->result->tableCandidates) && is_array($this->result->tableCandidates)) {
        $cands = $this->result->tableCandidates;
    }

    return FrameFormatter::frameTableCandidates($cands, $maxCandidates, $title);
    }

    /**
     * Table candidates (page/region hints) as-is.
     *
     * @return array<int,object>
     */
    public function tableCandidates()
    {
        if ($this->result && isset($this->result->tableCandidates) && is_array($this->result->tableCandidates)) {
            return $this->result->tableCandidates;
        }
        return array();
    }

    public function saveDebugOutput($dir)
    {
        if (!is_dir($dir)) {
            mkdir($dir, 0777, true);
        }
        $evidenceFile = $dir . '/evidence_index.json';
        $json = TsvSerializer::toJson($this->evidence()->getPages());
        file_put_contents($evidenceFile, $json);

        $textFile = $dir . '/final_text.txt';
        file_put_contents($textFile, $this->getText());

        foreach ($this->textPages() as $pageNum => $pageText) {
            $pageFile = $dir . '/page_' . $pageNum . '.txt';
            file_put_contents($pageFile, $pageText);
        }

        $baselineFile = $dir . '/baseline_framed.txt';
        file_put_contents($baselineFile, $this->baselineFramed());

        $candidatesFile = $dir . '/table_candidates.json';
        file_put_contents($candidatesFile, $this->tableCandidatesFramed(40, false));

        // Optionally save planner and table interpreter outputs if they exist
        if ($this->plannerOutput) {
            $plannerFile = $dir . '/planner_output.txt';
            file_put_contents($plannerFile, $this->plannerOutput);
        }
        if ($this->tableInterpreterOutput) {
            $tableInterpreterFile = $dir . '/table_interpreter_output.txt';
            file_put_contents($tableInterpreterFile, $this->tableInterpreterOutput);
        }
        if ($this->tesseractOptions) {
            $tesseractOptionsFile = $dir . '/tesseract_options.json';
            $optionsToSave = [];
            if (is_object($this->tesseractOptions) && method_exists($this->tesseractOptions, 'toArray')) {
                $optionsToSave = $this->tesseractOptions->toArray();
            } elseif(is_array($this->tesseractOptions)) {
                $optionsToSave = $this->tesseractOptions;
            }
            if (isset($optionsToSave['logger'])) {
                unset($optionsToSave['logger']);
            }
            file_put_contents($tesseractOptionsFile, json_encode($optionsToSave, JSON_PRETTY_PRINT));
        }
    }
}
