<?php

namespace boru\ocr\Tesseract\Tsv;

class TsvPage
{
    /** @var int */
    public $pageNumber;

    /** @var TsvRow[] */
    public $rows = array();

    public function __construct($pageNumber, array $rows = array())
    {
        $this->pageNumber = (int)$pageNumber;
        $this->rows = $rows;
    }

    /**
     * @return TsvRow[]
     */
    public function words()
    {
        $out = array();
        foreach ($this->rows as $r) {
            // In tesseract TSV, level 5 is word level
            if ((int)$r->level === 5) $out[] = $r;
        }
        return $out;
    }

    /**
     * Group by block/par/line -> array key "b:p:l"
     * @return array<string, TsvRow[]>
     */
    public function byLine()
    {
        $groups = array();
        foreach ($this->rows as $r) {
            $k = $r->block_num . ':' . $r->par_num . ':' . $r->line_num;
            if (!isset($groups[$k])) $groups[$k] = array();
            $groups[$k][] = $r;
        }
        return $groups;
    }
}
