<?php

namespace boru\ocr\Layout\Diagram;

use boru\ocr\Layout\Support\RegionGeometry;
use boru\ocr\Layout\Support\RegionLineSelector;
use boru\ocr\Layout\Support\RegionSignature;

class DiagramRegionDeduper
{
    /** @var RegionLineSelector */
    protected $lineSelector;

    /** @var RegionSignature */
    protected $signature;

    public function __construct(RegionLineSelector $lineSelector, RegionSignature $signature)
    {
        $this->lineSelector = $lineSelector;
        $this->signature = $signature;
    }

    /**
     * Suppress regions mostly contained in a larger region.
     *
     * @param array<int, array> $regions
     * @return array<int, array>
     */
    public function suppressContainedRegions(array $regions)
    {
        if (count($regions) <= 1) return $regions;

        usort($regions, array('boru\ocr\Layout\Support\RegionGeometry', 'cmpRegionAreaDesc'));

        $kept = array();

        foreach ($regions as $r) {
            $drop = false;

            foreach ($kept as $k) {
                $ratio = RegionGeometry::containmentRatio($r, $k);
                if ($ratio >= 0.88) {
                    $drop = true;
                    break;
                }
            }

            if (!$drop) $kept[] = $r;
        }

        return $kept;
    }

    /**
     * Suppress sibling regions that contain essentially the same text.
     *
     * @param array<int, array> $regions
     * @param array<int, array> $lines array<int, TsvRow[]>
     * @return array<int, array>
     */
    public function suppressEquivalentRegionsUsingLines(array $regions, array $lines)
    {
        if (count($regions) <= 1) return $regions;

        usort($regions, array('boru\ocr\Layout\Support\RegionGeometry', 'cmpRegionAreaDesc'));

        $kept = array();
        $keptSigs = array();

        foreach ($regions as $r) {
            $regionLines = $this->lineSelector->linesInRegion($lines, $r);
            $sig = $this->signature->signatureFromLines($regionLines);

            if ($sig === '') {
                $kept[] = $r;
                $keptSigs[] = $sig;
                continue;
            }

            $drop = false;

            for ($i = 0; $i < count($kept); $i++) {
                $k = $kept[$i];

                if (RegionGeometry::verticalOverlapRatio($r, $k) < 0.85) continue;

                $kSig = $keptSigs[$i];
                if ($kSig === '') continue;

                similar_text($sig, $kSig, $pct);
                if ($pct >= 90.0) {
                    $drop = true;
                    break;
                }
            }

            if (!$drop) {
                $kept[] = $r;
                $keptSigs[] = $sig;
            }
        }

        return $kept;
    }

    /**
     * Drop regions that overlap the title block heavily (prevents duplicate footer/title content).
     *
     * @param array<int, array> $regions
     * @param int|null $titleIdx
     * @param array<int, array> $lines
     * @return array<int, array>
     */
    public function suppressRegionsOverlappingTitleBlock(array $regions, $titleIdx, array $lines)
    {
        if ($titleIdx === null) return $regions;
        if (!isset($regions[$titleIdx])) return $regions;

        $tb = $regions[$titleIdx];

        $tbLines = $this->lineSelector->linesInRegion($lines, $tb);
        $tbSig = $this->signature->signatureFromLines($tbLines);

        $out = array();
        foreach ($regions as $i => $r) {
            if ($i === $titleIdx) {
                $out[] = $r;
                continue;
            }

            $contain = RegionGeometry::containmentRatio($r, $tb);
            $vOverlap = RegionGeometry::verticalOverlapRatio($r, $tb);

            if ($contain >= 0.85 || $vOverlap >= 0.90) {
                $rLines = $this->lineSelector->linesInRegion($lines, $r);
                $rSig = $this->signature->signatureFromLines($rLines);

                if ($tbSig !== '' && $rSig !== '') {
                    similar_text($tbSig, $rSig, $pct);
                    if ($pct >= 92.0) {
                        continue; // drop
                    }
                } else {
                    if ($contain >= 0.90) continue;
                }
            }

            $out[] = $r;
        }

        return $out;
    }
}
