<?php

namespace boru\ocr\Layout\Strategy;

use boru\ocr\Layout\Support\BoundsCalculator;
use boru\ocr\Layout\Support\LayoutDiagnostics;
use boru\ocr\Tesseract\Tsv\TsvRow;

class BandedOrderStrategy implements ReadingOrderStrategyInterface
{
    /** @var BoundsCalculator */
    protected $bounds;

    /** @var int */
    protected $bandTolerancePx = 14;

    public function __construct(BoundsCalculator $bounds, array $options = array())
    {
        $this->bounds = $bounds;
        if (isset($options['bandTolerancePx'])) $this->bandTolerancePx = (int)$options['bandTolerancePx'];
        if ($this->bandTolerancePx < 4) $this->bandTolerancePx = 4;
    }

    public function order(array $lines, LayoutDiagnostics $diag = null)
    {
        // sort words in each line left->right
        foreach ($lines as &$ln) {
            usort($ln, array(__CLASS__, 'cmpLeft'));
        }
        unset($ln);

        $meta = array();
        $count = count($lines);
        for ($i = 0; $i < $count; $i++) {
            $b = $this->bounds->lineBounds($lines[$i]);
            if ($b === null) continue;
            $meta[] = array('idx' => $i, 'top' => (int)$b['minTop'], 'left' => (int)$b['minLeft']);
        }

        if (count($meta) < 2) {
            usort($lines, array(__CLASS__, 'cmpLineTopLeft'));
            return $lines;
        }

        usort($meta, array(__CLASS__, 'cmpMetaTopLeft'));

        $bands = array();
        $current = array();
        $bandTop = null;
        $tol = (int)$this->bandTolerancePx;

        foreach ($meta as $m) {
            if ($bandTop === null) {
                $bandTop = $m['top'];
                $current[] = $m;
                continue;
            }
            if (abs($m['top'] - $bandTop) <= $tol) {
                $current[] = $m;
            } else {
                $bands[] = $current;
                $current = array($m);
                $bandTop = $m['top'];
            }
        }
        if (!empty($current)) $bands[] = $current;

        $ordered = array();
        foreach ($bands as $band) {
            usort($band, array(__CLASS__, 'cmpMetaLeft'));
            foreach ($band as $m) {
                $ordered[] = $lines[$m['idx']];
            }
        }

        if ($diag) {
            $diag->strategy = $diag->strategy ? $diag->strategy : 'banded';
        }

        return $ordered;
    }

    public static function cmpLeft(TsvRow $a, TsvRow $b)
    {
        if ($a->left === $b->left) {
            if ($a->top === $b->top) return 0;
            return ($a->top < $b->top) ? -1 : 1;
        }
        return ($a->left < $b->left) ? -1 : 1;
    }

    public static function cmpMetaTopLeft($a, $b)
    {
        if ($a['top'] === $b['top']) {
            if ($a['left'] === $b['left']) return 0;
            return ($a['left'] < $b['left']) ? -1 : 1;
        }
        return ($a['top'] < $b['top']) ? -1 : 1;
    }

    public static function cmpMetaLeft($a, $b)
    {
        if ($a['left'] === $b['left']) return 0;
        return ($a['left'] < $b['left']) ? -1 : 1;
    }

    public static function cmpLineTopLeft(array $a, array $b)
    {
        // fallback only
        $ta = isset($a[0]) ? (int)$a[0]->top : 0;
        $tb = isset($b[0]) ? (int)$b[0]->top : 0;
        if ($ta === $tb) {
            $la = isset($a[0]) ? (int)$a[0]->left : 0;
            $lb = isset($b[0]) ? (int)$b[0]->left : 0;
            if ($la === $lb) return 0;
            return ($la < $lb) ? -1 : 1;
        }
        return ($ta < $tb) ? -1 : 1;
    }
}
