<?php

namespace boru\ocr\Layout\Support;

use boru\ocr\Layout\Detector\KeyValueTableDetector;
use boru\ocr\Layout\LayoutOptions;
use boru\ocr\Tesseract\Tsv\TsvRow;

/**
 * Detect "table-like" contiguous regions inside an ordered line list.
 *
 * Supports two kinds:
 *  - kind=grid      (3+ column-ish grids; original behavior)
 *  - kind=key_value (2-column label/value tables)
 *
 * Input: ordered lines, where each line is an array of TsvRow tokens.
 *
 * PHP 5.6+
 */
class TableRegionDetector
{
    /** @var BoundsCalculator */
    protected $bounds;

    // ---- grid (original) ----
    protected $bucketPx = 18;
    protected $window = 7;
    protected $minLines = 5;
    protected $scoreThreshold = 0.55;
    protected $minTokens = 60;
    protected $minCols = 3;

    // ---- key/value ----
    protected $detectKeyValueTables = true;
    /** @var KeyValueTableDetector */
    protected $keyValueDetector;

    /** @var LayoutOptions */
    protected $layoutOptions;

    /** @var OCRLogger|null */
    protected $logger = null;
    use \boru\ocr\Traits\OcrLogTrait;

    public function __construct(BoundsCalculator $bounds, LayoutOptions $options = null)
    {
        $this->layoutOptions = LayoutOptions::create($options);
        $this->logger = $this->layoutOptions->logger;
        $this->bounds = $bounds;

        $this->applyLayoutOptions($this->layoutOptions);

        if ($this->bucketPx < 2) $this->bucketPx = 2;
        if ($this->window < 3) $this->window = 3;
        if ($this->minLines < 2) $this->minLines = 2;
        if ($this->minTokens < 0) $this->minTokens = 0;
        if ($this->minCols < 2) $this->minCols = 2;
        if ($this->scoreThreshold < 0.01) $this->scoreThreshold = 0.01;
        if ($this->scoreThreshold > 1.0) $this->scoreThreshold = 1.0;

        // KeyValue detector uses same option bag (array form)
        $kvOpts = clone $this->layoutOptions;
        $this->keyValueDetector = new KeyValueTableDetector($this->bounds,$kvOpts);
    }

    protected function applyLayoutOptions(LayoutOptions $o)
    {
        if (isset($o->tableRegionBucketPx)) $this->bucketPx = (int)$o->tableRegionBucketPx;
        if (isset($o->tableRegionWindow)) $this->window = (int)$o->tableRegionWindow;
        if (isset($o->tableRegionMinLines)) $this->minLines = (int)$o->tableRegionMinLines;
        if (isset($o->tableRegionScoreThreshold)) $this->scoreThreshold = (float)$o->tableRegionScoreThreshold;
        if (isset($o->tableRegionMinTokens)) $this->minTokens = (int)$o->tableRegionMinTokens;
        if (isset($o->tableRegionMinCols)) $this->minCols = (int)$o->tableRegionMinCols;

        if (isset($o->detectKeyValueTables)) $this->detectKeyValueTables = (bool)$o->detectKeyValueTables;
    }

    protected function applyArrayOptions(array $options)
    {
        if (isset($options['tableRegionBucketPx'])) $this->bucketPx = (int)$options['tableRegionBucketPx'];
        if (isset($options['tableRegionWindow'])) $this->window = (int)$options['tableRegionWindow'];
        if (isset($options['tableRegionMinLines'])) $this->minLines = (int)$options['tableRegionMinLines'];
        if (isset($options['tableRegionScoreThreshold'])) $this->scoreThreshold = (float)$options['tableRegionScoreThreshold'];
        if (isset($options['tableRegionMinTokens'])) $this->minTokens = (int)$options['tableRegionMinTokens'];
        if (isset($options['tableRegionMinCols'])) $this->minCols = (int)$options['tableRegionMinCols'];

        if (isset($options['detectKeyValueTables'])) $this->detectKeyValueTables = (bool)$options['detectKeyValueTables'];
    }

    protected function layoutOptionsToArray(LayoutOptions $o)
    {
        // Minimal mapping for KV detector
        $a = array();
        foreach (get_object_vars($o) as $k => $v) $a[$k] = $v;
        return $a;
    }

    /**
     * Detects table-like regions.
     *
     * @param array $orderedLines array<array<TsvRow>>
     * @param string $profile 'document'|'diagram' (optional; used to slightly relax thresholds)
     * @return array list of regions with keys:
     *   start,end,score,kind,cols,tokens,bbox,features
     */
    public function detect(array $orderedLines, $profile = 'document')
    {
        $n = count($orderedLines);
        if ($n < $this->minLines) return array();

        $gridRegions = $this->detectGridRegions($orderedLines, $profile);

        $kvRegions = array();
        if ($this->detectKeyValueTables) {
            $kvRegions = $this->detectKeyValueRegions($orderedLines, $profile);
        }

        // Backfill bbox for any region that didn't compute one.
        // This is critical for mapping detected regions back onto final reading order later.
        for ($i = 0; $i < count($gridRegions); $i++) {
            if (!isset($gridRegions[$i]['bbox']) || $gridRegions[$i]['bbox'] === null) {
                $gridRegions[$i]['bbox'] = $this->bboxForLineRange($orderedLines, $gridRegions[$i]['start'], $gridRegions[$i]['end']);
            }
        }
        for ($i = 0; $i < count($kvRegions); $i++) {
            if (!isset($kvRegions[$i]['bbox']) || $kvRegions[$i]['bbox'] === null) {
                $kvRegions[$i]['bbox'] = $this->bboxForLineRange($orderedLines, $kvRegions[$i]['start'], $kvRegions[$i]['end']);
            }
        }

        if (count($gridRegions) === 0) return $kvRegions;
        if (count($kvRegions) === 0) return $gridRegions;

        

        // Merge: prefer higher score when overlapping
        $all = array_merge($gridRegions, $kvRegions);
        usort($all, array($this, 'cmpRegionStartThenScore'));

        $out = array();
        foreach ($all as $rg) {
            if (count($out) === 0) { $out[] = $rg; continue; }
            $last = $out[count($out)-1];
            if ((int)$rg['start'] <= (int)$last['end']) {
                // overlap: keep the stronger score; if tie, prefer grid
                $keepRg = $rg;
                $keepLast = $last;

                if ((float)$last['score'] > (float)$rg['score']) {
                    $keepRg = null;
                } elseif ((float)$last['score'] == (float)$rg['score']) {
                    if (isset($last['kind']) && $last['kind'] === 'grid') {
                        $keepRg = null;
                    }
                }

                if ($keepRg !== null) {
                    $out[count($out)-1] = $rg;
                }
            } else {
                $out[] = $rg;
            }
        }

        return $out;
    }

    /**
     * Build a bounding box for a region by unioning line bounds in [start..end].
     *
     * @param array $orderedLines array<array<TsvRow>>
     * @param int $start
     * @param int $end
     * @return array|null {x,y,w,h} or null
     */
    protected function bboxForLineRange(array $orderedLines, $start, $end)
    {
        $minL = $minT = null;
        $maxR = $maxB = null;

        $start = (int)$start;
        $end   = (int)$end;

        if ($start < 0) $start = 0;
        if ($end >= count($orderedLines)) $end = count($orderedLines) - 1;
        if ($end < $start) return null;

        for ($i = $start; $i <= $end; $i++) {
            if (!isset($orderedLines[$i])) continue;

            // NOTE: assumes this class has $this->bounds (BoundsCalculator)
            $bb = $this->bounds->lineBounds($orderedLines[$i]);
            if ($bb === null) continue;

            $l = (int)$bb['left'];
            $t = (int)$bb['top'];
            $r = $l + (int)$bb['width'];
            $b = $t + (int)$bb['height'];

            if ($minL === null || $l < $minL) $minL = $l;
            if ($minT === null || $t < $minT) $minT = $t;
            if ($maxR === null || $r > $maxR) $maxR = $r;
            if ($maxB === null || $b > $maxB) $maxB = $b;
        }

        if ($minL === null) return null;

        return array(
            'x' => $minL,
            'y' => $minT,
            'w' => $maxR - $minL,
            'h' => $maxB - $minT,
        );
    }


    // ---------------- GRID detection (existing behavior) ----------------

    protected function detectGridRegions(array $orderedLines, $profile)
    {
        $n = count($orderedLines);

        $sigs = array();
        $colCounts = array();
        $tokenCounts = array();
        $bboxes = array();

        for ($i = 0; $i < $n; $i++) {
            $ln = $orderedLines[$i];
            if (!$ln || count($ln) === 0) {
                $sigs[$i] = '';
                $colCounts[$i] = 0;
                $tokenCounts[$i] = 0;
                $bboxes[$i] = null;
                continue;
            }

            usort($ln, array($this, 'cmpLeft'));

            $cols = array();
            $tok = 0;
            foreach ($ln as $r) {
                if (!($r instanceof TsvRow)) continue;
                $t = trim((string)$r->text);
                if ($t === '') continue;
                $tok++;
                $cx = (int)round(((int)$r->left) / (float)$this->bucketPx);
                $cols[$cx] = 1;
            }

            $tok = (int)$tok;
            $tokenCounts[$i] = $tok;

            $colKeys = array_keys($cols);
            sort($colKeys);
            $sigs[$i] = implode('-', $colKeys);
            $colCounts[$i] = count($colKeys);

            // bbox for this line
            $bboxes[$i] = $this->bounds->lineBounds($ln);
        }

        // sliding window score per line center
        $scores = array();
        for ($i = 0; $i < $n; $i++) {
            $w0 = max(0, $i - (int)floor($this->window / 2));
            $w1 = min($n - 1, $w0 + $this->window - 1);
            $w0 = max(0, $w1 - $this->window + 1);

            $hist = array();
            $cc = array();
            $tokSum = 0;
            $lineCount = 0;

            for ($j = $w0; $j <= $w1; $j++) {
                if ($sigs[$j] === '') continue;
                if ((int)$colCounts[$j] < (int)$this->minCols) continue;

                $lineCount++;
                $tokSum += (int)$tokenCounts[$j];

                if (!isset($hist[$sigs[$j]])) $hist[$sigs[$j]] = 0;
                $hist[$sigs[$j]]++;

                if (!isset($cc[$colCounts[$j]])) $cc[$colCounts[$j]] = 0;
                $cc[$colCounts[$j]]++;
            }

            if ($lineCount < $this->minLines || $tokSum < $this->minTokens) {
                $scores[$i] = 0.0;
                continue;
            }

            // repetition score: dominant signature frequency
            arsort($hist);
            $dominant = (count($hist) > 0) ? (float)reset($hist) / (float)$lineCount : 0.0;

            // stability score: dominant column count frequency
            arsort($cc);
            $ccdom = (count($cc) > 0) ? (float)reset($cc) / (float)$lineCount : 0.0;

            // combined score
            $score = 0.65*$dominant + 0.35*$ccdom;

            // diagrams tend to be noisier; allow a small boost in diagram mode
            if ($profile === 'diagram') $score += 0.04;

            if ($score > 1.0) $score = 1.0;
            $scores[$i] = $score;
        }

        // turn high-score runs into regions
        $threshold = $this->scoreThreshold;
        if ($profile === 'diagram') $threshold = max(0.01, $threshold - 0.06);

        $regions = array();
        $in = false;
        $start = 0;
        $best = 0.0;

        for ($i = 0; $i < $n; $i++) {
            $hi = ((float)$scores[$i] >= (float)$threshold);

            if ($hi && !$in) {
                $in = true;
                $start = $i;
                $best = (float)$scores[$i];
            } elseif ($hi && $in) {
                if ((float)$scores[$i] > $best) $best = (float)$scores[$i];
            } elseif (!$hi && $in) {
                $end = $i - 1;
                $rg = $this->regionFromSpan($orderedLines, $start, $end, $best, 'grid');
                if ($rg !== null) $regions[] = $rg;
                $in = false;
            }
        }

        if ($in) {
            $end = $n - 1;
            $rg = $this->regionFromSpan($orderedLines, $start, $end, $best, 'grid');
            if ($rg !== null) $regions[] = $rg;
        }

        return $regions;
    }

    // ---------------- KEY/VALUE detection ----------------

    protected function detectKeyValueRegions(array $orderedLines, $profile)
    {
        $n = count($orderedLines);
        $minLines = (int)$this->keyValueDetector->minLines();
        $minTokens = (int)$this->keyValueDetector->minTokens();

        // In diagrams, KV tables are common and OCR is noisy: allow small relaxation
        $threshold = (float)$this->keyValueDetector->threshold();
        if ($profile === 'diagram') $threshold = max(0.01, $threshold - 0.08);

        // Greedy scan for best segment starting at i within maxLen
        $maxLen = max($minLines, (int)($this->window + 8));
        if ($maxLen < 8) $maxLen = 8;

        $regions = array();
        $i = 0;
        while ($i <= $n - $minLines) {
            $best = null;
            $bestScore = 0.0;

            $maxEnd = min($n - 1, $i + $maxLen - 1);
            for ($end = $i + $minLines - 1; $end <= $maxEnd; $end++) {
                $slice = array_slice($orderedLines, $i, ($end - $i + 1));
                $s = $this->keyValueDetector->scoreLines($slice);

                if ((int)$s['tokenSum'] < $minTokens) continue;
                if ((float)$s['score'] < $threshold) continue;

                if ((float)$s['score'] > $bestScore) {
                    $bestScore = (float)$s['score'];
                    $best = array(
                        'start' => $i,
                        'end' => $end,
                        'score' => (float)$s['score'],
                        'kind' => 'key_value',
                        'cols' => 2,
                        'tokens' => (int)$s['tokenSum'],
                        'features' => $s['features'],
                    );
                }
            }

            if ($best !== null) {
                // compute bbox
                $rows = array();
                for ($k = (int)$best['start']; $k <= (int)$best['end']; $k++) {
                    $ln = $orderedLines[$k];
                    if ($ln && count($ln) > 0) $rows[] = $ln;
                }
                $bb = ($rows && count($rows) > 0) ? $this->bounds->pageBoundsFromRows($rows) : null;
                $best['bbox'] = $bb ? array('x'=>$bb['left'],'y'=>$bb['top'],'w'=>$bb['width'],'h'=>$bb['height']) : null;

                $regions[] = $best;
                $i = (int)$best['end'] + 1;
            } else {
                $i++;
            }
        }

        return $regions;
    }

    // ---------------- region helpers ----------------

    protected function regionFromSpan(array $orderedLines, $start, $end, $score, $kind)
    {
        $len = (int)$end - (int)$start + 1;
        if ($len < $this->minLines) return null;

        $rows = array();
        $tokSum = 0;
        $maxCols = 0;

        for ($k = (int)$start; $k <= (int)$end; $k++) {
            $ln = $orderedLines[$k];
            if (!$ln || count($ln) === 0) continue;
            $rows[] = $ln;
            $tokSum += count($ln);

            // rough col count
            $cols = array();
            foreach ($ln as $r) {
                if (!($r instanceof TsvRow)) continue;
                $t = trim((string)$r->text);
                if ($t === '') continue;
                $cx = (int)round(((int)$r->left) / (float)$this->bucketPx);
                $cols[$cx] = 1;
            }
            $cc = count($cols);
            if ($cc > $maxCols) $maxCols = $cc;
        }

        if ($tokSum < $this->minTokens) return null;

        $bb = ($rows && count($rows) > 0) ? $this->bounds->pageBoundsFromRows($rows) : null;

        return array(
            'start' => (int)$start,
            'end' => (int)$end,
            'score' => (float)$score,
            'kind' => $kind,
            'cols' => (int)$maxCols,
            'tokens' => (int)$tokSum,
            'bbox' => $bb ? array('x'=>$bb['left'],'y'=>$bb['top'],'w'=>$bb['width'],'h'=>$bb['height']) : null,
            'features' => array(
                'bucketPx' => (int)$this->bucketPx,
                'window' => (int)$this->window,
            ),
        );
    }

    public function cmpRegionStartThenScore($a, $b)
    {
        $as = (int)$a['start']; $bs = (int)$b['start'];
        if ($as === $bs) {
            $sa = (float)$a['score']; $sb = (float)$b['score'];
            if ($sa === $sb) return 0;
            return ($sa > $sb) ? -1 : 1; // higher score first
        }
        return ($as < $bs) ? -1 : 1;
    }

    public function cmpLeft(TsvRow $a, TsvRow $b)
    {
        if ($a->left === $b->left) return 0;
        return ($a->left < $b->left) ? -1 : 1;
    }
}
