<?php

namespace boru\ocr\Agent;

use boru\ocr\Agent\OCRAgent;
use boru\ocr\OcrEngineResult;

class PlannerAgent
{
    /** @var OCRAgent */
    protected $agent;

    protected $logger = null;

    public static $instructions =
"You are an OCR PLANNING agent.

You will be given:
- TSV_BASELINE (page-framed text generated from TSV layout)
- TESSERACT_TEXT (page-framed text)
- CONFIDENCE_SUMMARY (overall + per-page stats)
- CONFIDENCE_HOTSPOTS (low-confidence line regions with bbox + sample text)

Your job:
1) Spend TSV tool calls ONLY where they create value.
2) Prioritize hotspots first, then known high-value regions (totals, dates, IDs).
3) Use anchors first, then near-context, then small boxes.

Output ONLY VALID JSON (no markdown) with this shape:

{
  \"anchors\": [
    {\"needle\":\"...\",\"page\":1,\"padX\":160,\"padY\":110,\"minConf\":0,\"maxMatches\":5}
  ],
  \"boxes\": [
    {\"page\":1,\"x1\":0,\"y1\":0,\"x2\":600,\"y2\":220,\"minConf\":0}
  ],
  \"notes\": [\"...\"] ,
  \"priority\": \"normal|high\"
}

Rules:
- Keep it small: aim <= 8 total items (anchors + boxes).
- Use hotspots to choose pages/regions. Do not propose full-page boxes unless absolutely necessary.
- If a hotspot has a bbox, prefer a box around that bbox (with a modest padding).
- If a hotspot sample text contains an obvious anchor word (e.g., 'Total', 'Account', 'Case', 'DOB'), add an anchor plan.
";


    /**
     * @param string $pdfPath
     * @param AgentOptions|array $agentOptions
     * @param callable|null $logger
     */
    public function __construct($pdfPath, $agentOptions=null, $logger = null)
    {
        if(!$agentOptions) {
            $agentOptions = new AgentOptions();
        }
        if(!$logger && isset($agentOptions->logger) && is_callable($agentOptions->logger)) {
            $logger = $agentOptions->logger;
        }
        if(!$agentOptions->reference()) {
            $agentOptions->reference = "PlannerAgent_".basename($pdfPath)."_".date("YmdHis")."_".uniqid();
        }
        $this->logger = $logger;
        $this->agent = new OCRAgent($pdfPath, $agentOptions, $logger);
        $this->agent->instructions(self::$instructions);
    }

    /**
     * @param OcrEngineResult $bundle
     * @return string JSON plan (raw model output)
     */
    public function run(OcrEngineResult $bundle)
    {
        $this->log("PlannerAgent::run");
        $msg = array();
        // Confidence (if available)
        $conf = $bundle->confidence();
        if ($conf) {
            $msg[] = "CONFIDENCE_SUMMARY:\n" . json_encode($conf->summaryArray(), JSON_PRETTY_PRINT);

            // Keep this short + useful for planning
            $hotspots = array();
            foreach ($conf->hotspots(15) as $h) {
                $hotspots[] = $h->toArray();
            }
            $msg[] = "CONFIDENCE_HOTSPOTS:\n" . json_encode($hotspots, JSON_PRETTY_PRINT);
        }
        $msg[] = "TSV_BASELINE:\n" . $bundle->baselineFramed();
        $msg[] = "TESSERACT_TEXT:\n" . $bundle->textFramed();
        $msg[] = "Return the JSON plan now.";

        $this->agent->message($msg);

        $result = $this->agent->run();
        return $result;
    }

    protected function log($msg)
    {
        if ($this->logger) {
            call_user_func($this->logger, $msg);
        }
      }
}
