<?php

namespace boru\ocr\Layout\Support;

use boru\ocr\Layout\LayoutOptions;
use boru\ocr\Tesseract\Tsv\TsvRow;

/**
 * Table-oriented line renderer.
 *
 * Goal: preserve column alignment more aggressively than LineRenderer by
 * turning bbox gaps into multiple spaces (capped), so tabular data stays readable.
 *
 * This is still "plain text", not a strict CSV/TSV exporter.
 */
class TableLineRenderer
{
    /** @var bool */
    protected $dropEmpty = true;

    /** @var int Pixels per space when expanding gaps */
    protected $pxPerSpace = 10;

    /** @var int Always insert at least this many spaces between tokens */
    protected $minSpaces = 1;

    /** @var int Cap to avoid ridiculous spacing on huge gaps */
    protected $maxSpaces = 24;

    /** @var int If gap <= this, treat as "normal" single spacing */
    protected $smallGapPx = 6;

    /** @var bool */
    protected $trimToken = true;

    /** @var LayoutOptions|null */
    protected $layoutOptions = null;

    public function __construct(LayoutOptions $options = null)
    {
        $this->layoutOptions = LayoutOptions::create($options);
        $this->dropEmpty = $this->layoutOptions->dropEmpty;
        if (isset($this->layoutOptions->tablePxPerSpace)) $this->pxPerSpace = (int)$this->layoutOptions->tablePxPerSpace;
        if (isset($this->layoutOptions->tableMinSpaces)) $this->minSpaces = (int)$this->layoutOptions->tableMinSpaces;
        if (isset($this->layoutOptions->tableMaxSpaces)) $this->maxSpaces = (int)$this->layoutOptions->tableMaxSpaces;
        if (isset($this->layoutOptions->tableSmallGapPx)) $this->smallGapPx = (int)$this->layoutOptions->tableSmallGapPx;
        if (isset($this->layoutOptions->tableTrimToken)) $this->trimToken = (bool)$this->layoutOptions->tableTrimToken;

        if ($this->pxPerSpace < 1) $this->pxPerSpace = 1;
        if ($this->minSpaces < 1) $this->minSpaces = 1;
        if ($this->maxSpaces < $this->minSpaces) $this->maxSpaces = $this->minSpaces;
        if ($this->smallGapPx < 0) $this->smallGapPx = 0;
    }

    /**
     * Render a TSV "line" (array of word rows) into a single string.
     *
     * @param TsvRow[] $line
     * @return string
     */
    public function render(array $line)
    {
        if (!$line) return '';

        // left-sort
        usort($line, array($this, 'cmpLeft'));

        $out = '';
        $prevRight = null;

        foreach ($line as $r) {
            $t = $r->text;

            if ($this->dropEmpty && $t === '') continue;

            // normalize whitespace
            $t = str_replace(array("\t", "\r", "\n"), ' ', $t);
            if ($this->trimToken) $t = trim($t);

            if ($t === '' && $this->dropEmpty) continue;

            if ($out === '') {
                $out = $t;
                $prevRight = $r->right();
                continue;
            }

            $gap = 0;
            if ($prevRight !== null) {
                $gap = (int)$r->left - (int)$prevRight;
            }

            $spaces = $this->spacesForGap($gap);
            $out .= str_repeat(' ', $spaces) . $t;

            $prevRight = $r->right();
        }

        return $out;
    }

    /**
     * Convert pixel gap to #spaces (capped).
     *
     * @param int $gapPx
     * @return int
     */
    protected function spacesForGap($gapPx)
    {
        $gapPx = (int)$gapPx;
        if ($gapPx <= $this->smallGapPx) {
            return $this->minSpaces;
        }

        // Expand: minSpaces + (gap / pxPerSpace), capped
        $extra = (int)floor($gapPx / $this->pxPerSpace);
        $spaces = $this->minSpaces + $extra;

        if ($spaces > $this->maxSpaces) $spaces = $this->maxSpaces;
        if ($spaces < $this->minSpaces) $spaces = $this->minSpaces;

        return $spaces;
    }

    /**
     * @param TsvRow $a
     * @param TsvRow $b
     * @return int
     */
    public function cmpLeft($a, $b)
    {
        if ((int)$a->left === (int)$b->left) return 0;
        return ((int)$a->left < (int)$b->left) ? -1 : 1;
    }
}
