<?php

namespace boru\ocr\Tesseract;

use boru\ocr\Page\PageImageProviderInterface;
use boru\ocr\Tesseract\Tsv\TsvParser;
use boru\ocr\Tesseract\Tsv\TsvPage;

class TesseractTsvOcr
{
    /** @var PageImageProviderInterface */
    protected $pageProvider;

    /** @var TesseractCliRunner */
    protected $runner;

    /** @var TsvParser */
    protected $parser;

    public function __construct(PageImageProviderInterface $pageProvider, TesseractCliRunner $runner, TsvParser $parser)
    {
        $this->pageProvider = $pageProvider;
        $this->runner = $runner;
        $this->parser = $parser;
    }

    /**
     * Returns array of TsvPage, one per document page.
     *
     * @param TesseractOptions $opts
     * @return TsvPage[]
     */
    public function ocrDocument(TesseractOptions $opts)
    {
        $pages = $this->pageProvider->getPages();
        $out = array();

        $pageIndex = 0;
        foreach ($pages as $pageEntry) {
            $pageNumber = $pageIndex + 1;

            // single image -> parse rows
            if (is_string($pageEntry)) {
                $raw = $this->runner->run($pageEntry, $opts, 'tsv');
                $rows = $this->parser->parseRows($raw);
                $out[] = new TsvPage($pageNumber, $rows);
                continue;
            }

            // tiled images -> parse each tile, offset, merge
            if (is_array($pageEntry)) {
                $merged = array();

                foreach ($pageEntry as $tile) {
                    if (!isset($tile['path'])) continue;
                    $dx = isset($tile['offset_x']) ? (int)$tile['offset_x'] : 0;
                    $dy = isset($tile['offset_y']) ? (int)$tile['offset_y'] : 0;

                    $raw = $this->runner->run($tile['path'], $opts, 'tsv');
                    $rows = $this->parser->parseRows($raw);

                    foreach ($rows as $r) {
                        $r->applyOffset($dx, $dy);
                        $merged[] = $r;
                    }
                }

                $out[] = new TsvPage($pageNumber, $merged);
                continue;
            }

            $out[] = new TsvPage($pageNumber, array());
            $pageIndex++;
        }

        return $out;
    }
}
