<?php

namespace boru\ocr\Agent;

use boru\ocr\Agent\OCRAgent;
use boru\ocr\OcrEngineResult;

/**
 * TableInterpreterAgent:
 * - consumes TSV baseline + table candidate hints (pages/regions)
 * - uses TSV evidence tools sparingly to interpret and structure tables
 * - outputs JSON only (tables + confidence)
 *
 * PHP 5.6+
 */
class TableInterpreterAgent
{
    /** @var OCRAgent */
    protected $agent;

    protected $logger = null;

    public static $instructions =
"You are the TABLE INTERPRETATION agent.

Goal:
- Identify and extract table-like structures (including parts lists, BOMs, schedules, menus, legal payment tables).
- Use TABLE_CANDIDATES to focus. These are hints of where tables likely exist (page + optional bbox).
- Use TSV tools to read table cells precisely when needed.
- Output STRICT JSON ONLY. No prose.

What to output (JSON schema):
{
  \"tables\": [
    {
      \"page\": 1,
      \"source\": { \"kind\": \"region\"|\"page\", \"bbox\": {\"x\":0,\"y\":0,\"w\":0,\"h\":0} | null, \"region\": \"TITLE BLOCK\"|null },
      \"columns\": [\"...\"],
      \"rows\": [ { \"cells\": [\"...\",\"...\"] } ],
      \"confidence\": 0.0,
      \"notes\": [\"...\"]
    }
  ],
  \"notes\": [\"...\"]
}

Rules:
- Only emit a table if you are confident it's truly a table (confidence >= 0.55).
- Prefer extracting a smaller, correct table over a huge, noisy one.
- If no tables are found, output {\"tables\":[],\"notes\":[...]}.

Tool use:
- Use tsv_get_box_text when a TABLE_CANDIDATES bbox is present (tight box).
- Use tsv_find_text + tsv_get_near_match_text for anchors like 'Qty', 'Amount', 'Total', 'Part', 'Date', 'Description', 'Price'.

Now produce the JSON output.";

    public function __construct($pdfPath, array $tools = array(), $agentOptions = null, $logger = null)
    {
        if(!$agentOptions) {
            $agentOptions = new AgentOptions();
        }
        if(!$logger && isset($agentOptions->logger) && is_callable($agentOptions->logger)) {
            $logger = $agentOptions->logger;
        }
        if(!$agentOptions->reference()) {
            $agentOptions->reference = "TableInterpreterAgent_".basename($pdfPath)."_".date("YmdHis")."_".uniqid();
        }
        $this->logger = $logger;
        $this->agent = new OCRAgent($pdfPath, $agentOptions, $logger);
        $this->agent->instructions(self::$instructions);

        if (!empty($tools)) {
            $this->agent->tools($tools);
        }
    }

    /**
     * @param OcrEngineResult $bundle
     * @param string|null $planJson
     * @return string JSON string
     */
    public function run(OcrEngineResult $bundle, $planJson = null)
    {
        $this->log("TableInterpreterAgent::run");
        $msg = array();

        $msg[] = "TSV_BASELINE:\n" . $bundle->baselineFramed();
        $msg[] = "TABLE_CANDIDATES:\n" . $bundle->tableCandidatesFramed();

        if ($planJson !== null && trim((string)$planJson) !== '') {
            $msg[] = "PLANNER_PLAN_JSON:\n" . (string)$planJson;
        }

        $this->agent->message($msg);

        return $this->agent->run();
    }

    protected function log($msg)
    {
        if ($this->logger) {
            call_user_func($this->logger, $msg);
        }
    }
}
