<?php

namespace boru\ocr\Layout\Support;

use boru\ocr\Tesseract\Tsv\TsvRow;

class LineGrouper
{
    /** @var int */
    protected $fallbackTopClusterPx = 8;

    public function __construct(array $options = array())
    {
        if (isset($options['fallbackTopClusterPx'])) $this->fallbackTopClusterPx = (int)$options['fallbackTopClusterPx'];
    }

    /**
     * Group rows into lines using block/par/line when available.
     *
     * @param TsvRow[] $rows
     * @return array<int, TsvRow[]>
     */
    public function groupIntoLines(array $rows)
    {
        $hasStructured = false;
        foreach ($rows as $r) {
            if ($r->block_num > 0 || $r->par_num > 0 || $r->line_num > 0) {
                $hasStructured = true;
                break;
            }
        }

        if ($hasStructured) {
            $groups = array();
            foreach ($rows as $r) {
                $k = $r->block_num . ':' . $r->par_num . ':' . $r->line_num;
                if (!isset($groups[$k])) $groups[$k] = array();
                $groups[$k][] = $r;
            }
            return array_values($groups);
        }

        // fallback cluster by top
        usort($rows, array(__CLASS__, 'cmpTopThenLeft'));

        $lines = array();
        $current = array();
        $currentTop = null;
        $threshold = (int)$this->fallbackTopClusterPx;

        foreach ($rows as $r) {
            if ($currentTop === null) {
                $currentTop = $r->top;
                $current[] = $r;
                continue;
            }
            if (abs($r->top - $currentTop) <= $threshold) {
                $current[] = $r;
                continue;
            }
            $lines[] = $current;
            $current = array($r);
            $currentTop = $r->top;
        }
        if (!empty($current)) $lines[] = $current;

        return $lines;
    }

    public static function cmpTopThenLeft(TsvRow $a, TsvRow $b)
    {
        if ($a->top === $b->top) {
            if ($a->left === $b->left) return 0;
            return ($a->left < $b->left) ? -1 : 1;
        }
        return ($a->top < $b->top) ? -1 : 1;
    }
}
