<?php

namespace boru\ocr\Layout;

use boru\ocr\Tesseract\Tsv\TsvPage;
use boru\ocr\Layout\Detector\LayoutProfileDetector;
use boru\ocr\Layout\Support\BoundsCalculator;
use boru\ocr\Layout\Support\LineGrouper;
use boru\ocr\Layout\Support\LineRenderer;
use boru\ocr\Layout\Support\LayoutDiagnostics;
use boru\ocr\Layout\Strategy\BandedOrderStrategy;
use boru\ocr\Layout\Strategy\LegacyOrderStrategy;
use boru\ocr\Layout\Strategy\AutoDocumentStrategy;
use boru\ocr\Layout\Strategy\DiagramLayoutStrategy;
use boru\ocr\Layout\Detector\TableLayoutDetector;
use boru\ocr\Layout\Strategy\TableLayoutStrategy;
use boru\ocr\Layout\Support\TableLineRenderer;


/**
 * Facade: Builds readable baseline page text from TSV.
 *
 * This class keeps the public API small while delegating to:
 * - profile detector (document vs diagram)
 * - ordering strategies
 * - diagram region renderer
 */
class TsvLayoutBuilder
{
    /** @var int */
    protected $minConf = 0;

    /** @var bool */
    protected $dropEmpty = true;

    /** @var string auto|document|diagram */
    protected $layoutProfile = 'auto';

    /** @var bool */
    protected $diagnosticsEnabled = false;

    /** @var LayoutDiagnostics|null */
    protected $lastDiagnostics = null;

    // helpers
    /** @var BoundsCalculator */
    protected $bounds;

    /** @var LineGrouper */
    protected $grouper;

    /** @var LineRenderer */
    protected $renderer;

    // detector
    /** @var LayoutProfileDetector */
    protected $detector;

    // strategies
    /** @var BandedOrderStrategy */
    protected $banded;

    /** @var LegacyOrderStrategy */
    protected $legacy;

    /** @var AutoDocumentStrategy */
    protected $autoDoc;

    /** @var DiagramLayoutStrategy */
    protected $diagram;

    /** @var TableLayoutDetector */
    protected $tableDetector;

    /** @var TableLayoutStrategy */
    protected $table;

    /** @var TableLineRenderer */
    protected $tableRenderer;


    /**
     * @param array $options
     *   Common:
     *     - minConf (int) default 0
     *     - dropEmpty (bool) default true
     *     - diagnostics (bool) default false
     *     - layoutProfile (string) default auto (auto|document|diagram|table)
     *
     *   Rendering:
     *     - preserveGapSpacing (bool) default false
     *     - gapExtraSpacePx (int) default 12
     *
     *   Document ordering:
     *     - bandTolerancePx (int) default 14
     *     - headerBandPx (int) default 300
     *     - multiColumn (bool) default true
     *     - detectMixedLayout (bool) default true
     *
     *   Diagram:
     *     - diagramScoreThreshold (float) default 0.62
     *     - clusterPadPx (int) default 70
     *     - minRegionWords (int) default 8
     */
    public function __construct(array $options = array())
    {
        if (isset($options['minConf'])) $this->minConf = (int)$options['minConf'];
        if (array_key_exists('dropEmpty', $options)) $this->dropEmpty = (bool)$options['dropEmpty'];
        if (array_key_exists('diagnostics', $options)) $this->diagnosticsEnabled = (bool)$options['diagnostics'];
        if (isset($options['layoutProfile'])) $this->layoutProfile = (string)$options['layoutProfile'];
        if (!in_array($this->layoutProfile, array('auto','document','diagram','table'), true)) $this->layoutProfile = 'auto';


        $this->bounds = new BoundsCalculator();
        $this->grouper = new LineGrouper($options);
        $this->renderer = new LineRenderer($options);

        $this->detector = new LayoutProfileDetector($this->bounds, $options);
        $this->tableDetector = new TableLayoutDetector($this->bounds, $options);
        $this->tableRenderer = new TableLineRenderer($options);
        $this->table = new TableLayoutStrategy($this->bounds,  $options);


        $this->banded = new BandedOrderStrategy($this->bounds, $options);
        $this->legacy = new LegacyOrderStrategy($this->bounds, $options);
        $this->autoDoc = new AutoDocumentStrategy($this->bounds, $this->banded, $this->legacy, $options);

        $this->diagram = new DiagramLayoutStrategy($this->bounds, $this->banded, $this->renderer, $options);
    }

    /**
     * @return array|null diagnostics array
     */
    public function getLastDiagnostics()
    {
        if (!$this->lastDiagnostics) return null;
        return $this->lastDiagnostics->toArray();
    }

    /**
     * Main entrypoint.
     *
     * @param TsvPage $page
     * @return string
     */
    public function buildPageText(TsvPage $page)
    {
        $this->lastDiagnostics = null;

        $diag = new LayoutDiagnostics();
        $diag->page = (int)$page->pageNumber;

        // 1) Select rows (prefer words), apply confidence filtering
        $rows = $this->selectRows($page);

        // 2) Group into lines
        $lines = $this->grouper->groupIntoLines($rows);

        // 3) Decide profile
        $profile = $this->layoutProfile;
        $decision = null;

        if ($profile === 'auto') {
            $decision = $this->detector->detect($page, $rows, $lines);
            $profile = $decision->profile;
            $diag->profileDecision = $decision->toArray();
        } else {
            $diag->profileDecision = array('profile' => $profile, 'confidence' => 1.0, 'features' => array(), 'notes' => array('Forced by layoutProfile option.'));
        }

        $diag->profile = $profile;

        // 4) Build output
        if ($profile === 'diagram') {
            $text = $this->diagram->buildText($rows, $lines, $diag);
        } else {
            // document
            $ordered = $this->autoDoc->order($lines, $diag);
            $out = array();
            foreach ($ordered as $ln) {
                $t = trim($this->renderer->renderLine($ln));
                if ($t !== '') $out[] = $t;
            }
            $text = implode("\n", $out);
        }

        if ($this->diagnosticsEnabled) {
            $this->lastDiagnostics = $diag;
        }

        return $text;
    }

    /**
     * Prefer word rows when available; filter empty and confidence.
     *
     * @param TsvPage $page
     * @return array
     */
    protected function selectRows(TsvPage $page)
    {
        $words = $page->words();
        $rows = ($words && count($words) > 0) ? $words : $page->rows;

        $out = array();
        foreach ($rows as $r) {
            if ($this->dropEmpty && $r->text === '') continue;

            if ((int)$r->level === 5) {
                if ($r->conf < $this->minConf) continue;
            }

            $out[] = $r;
        }

        return $out;
    }
}
